This is a protocol for the analysis of the data preformed in R and the packages being used in Rstudio. Read through the code and comments carefully. The protocole displayes the output in the terminal and the plots generated by the code.

If there is any questions feel free to mail:

# loading necessary library
library(pathview)
## 
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
## 
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
library(clusterProfiler)
## 
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   inner_join.phylo    tidytree
##   inner_join.treedata tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
## clusterProfiler v4.8.3  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(limma)
library(edgeR)
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following object is masked from 'package:grDevices':
## 
##     windows
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## 
library(AnnotationDbi)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.3     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%()   masks IRanges::%within%()
## ✖ IRanges::collapse()     masks dplyr::collapse()
## ✖ Biobase::combine()      masks BiocGenerics::combine(), dplyr::combine()
## ✖ IRanges::desc()         masks dplyr::desc()
## ✖ tidyr::expand()         masks S4Vectors::expand()
## ✖ dplyr::filter()         masks clusterProfiler::filter(), stats::filter()
## ✖ S4Vectors::first()      masks dplyr::first()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ ggplot2::Position()     masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()         masks IRanges::reduce()
## ✖ S4Vectors::rename()     masks dplyr::rename(), clusterProfiler::rename()
## ✖ lubridate::second()     masks S4Vectors::second()
## ✖ lubridate::second<-()   masks S4Vectors::second<-()
## ✖ AnnotationDbi::select() masks dplyr::select(), clusterProfiler::select()
## ✖ purrr::simplify()       masks clusterProfiler::simplify()
## ✖ IRanges::slice()        masks dplyr::slice(), clusterProfiler::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(biomaRt)
library(readxl)
library(affy)
## 
## Attaching package: 'affy'
## 
## The following object is masked from 'package:lubridate':
## 
##     pm
library(ggplot2)
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:IRanges':
## 
##     collapse
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.8-42. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## 
## Attaching package: 'genefilter'
## 
## The following object is masked from 'package:readr':
## 
##     spec
## 
## Loading required package: BiocParallel
library(gplots)
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:IRanges':
## 
##     space
## 
## The following object is masked from 'package:S4Vectors':
## 
##     space
## 
## The following object is masked from 'package:stats':
## 
##     lowess
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
## 
## Attaching package: 'futile.logger'
## 
## The following object is masked from 'package:mgcv':
## 
##     scat
library(eulerr)
## Registered S3 method overwritten by 'eulerr':
##   method    from  
##   plot.venn gplots
## 
## Attaching package: 'eulerr'
## 
## The following object is masked from 'package:gplots':
## 
##     venn
library(UpSetR)
library(pheatmap)
library(writexl)
## Warning: package 'writexl' was built under R version 4.3.2
#  Load file with Counts per million after normalization data
rawCounts <-
  read.table("raw_counts_cpm_after_norm_reorganised_220722.txt", header = T)

# Converting ensemblID to symbol
rawCounts$GeneNames <- mapIds(
  org.Mm.eg.db,
  keys = rawCounts$ensembl_id,
  keytype = "ENSEMBL",
  column = "SYMBOL"
)
## 'select()' returned 1:many mapping between keys and columns
# Removing NA values
rawCounts <- na.omit(rawCounts)

# Keep only rows where all values are above 1
rawCounts <- rawCounts[apply(rawCounts, 1, function(x) all(x > 1)), ]


# Removing ENsemblID column
rawCounts <- rawCounts[, -1]
# Order the columns
rawCounts <- rawCounts[order(colnames(rawCounts))]

# Build a separated dataframe for the ribotag and totalRNA for astrocyte and microglia sep.
AC_RT <-
  subset(rawCounts,
         !(rownames(rawCounts) %in% "16709"),
         select = c(1, 2, 6, 7, 8, 12, 13, 14, 18))



MG_RT <-
  subset(rawCounts,
         !(rownames(rawCounts) %in% "16709"),
         select = c(19, 20, 21, 25, 26, 27, 31, 32, 33, 18))


# Check the variance of the gene expression data
AC_RT_var <- sapply(AC_RT, var)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
MG_RT_var <- sapply(MG_RT, var)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
# Plot the variance distribution to check if it is homogeneous
hist(AC_RT_var,
     main = "Histogram of Ribotag Astrocyte Values Distribution",
     xlab = "Values",
     col = "lightblue") # plot histogram

hist(MG_RT_var,
     main = "Histogram of Ribotag Microglia Values Distribution",
     xlab = "Values",
     col = "lightblue") # plot histogram

# Make object to  store p-values
PvalueAC_FM <- data.frame(Pvalue = rep(0, nrow(AC_RT)))
PvalueAC_FR <- data.frame(Pvalue = rep(0, nrow(AC_RT)))
PvalueAC_MR <- data.frame(Pvalue = rep(0, nrow(AC_RT)))

PvalueMG_FM <- data.frame(Pvalue = rep(0, nrow(MG_RT)))
PvalueMG_FR <- data.frame(Pvalue = rep(0, nrow(MG_RT)))
PvalueMG_MR <- data.frame(Pvalue = rep(0, nrow(MG_RT)))


for (i in 1:nrow(AC_RT)) {
  PvalueAC_FM$Pvalue[i] <-
    t.test(AC_RT[i, 1:2], AC_RT[i, 3:5], alternative = "two.sided")$p.value
  PvalueAC_FR$Pvalue[i] <-
    t.test(AC_RT[i, 1:2], AC_RT[i, 6:8], alternative = "two.sided")$p.value
  PvalueAC_MR$Pvalue[i] <-
    t.test(AC_RT[i, 3:5], AC_RT[i, 6:8], alternative = "two.sided")$p.value
}

for (i in 1:nrow(MG_RT)) {
  PvalueMG_FM$Pvalue[i] <-
    t.test(MG_RT[i, 1:3], MG_RT[i, 4:6], alternative = "two.sided")$p.value
  PvalueMG_FR$Pvalue[i] <-
    t.test(MG_RT[i, 1:3], MG_RT[i, 7:9], alternative = "two.sided")$p.value
  PvalueMG_MR$Pvalue[i] <-
    t.test(MG_RT[i, 4:6], MG_RT[i, 7:9], alternative = "two.sided")$p.value
}

# add the column genename
PvalueAC_FM$GeneNames <- AC_RT$GeneNames
PvalueAC_FR$GeneNames <- AC_RT$GeneNames
PvalueAC_MR$GeneNames <- AC_RT$GeneNames

PvalueMG_FM$GeneNames <- MG_RT$GeneNames
PvalueMG_FR$GeneNames <- MG_RT$GeneNames
PvalueMG_MR$GeneNames <- MG_RT$GeneNames


# make a FClog column into same pValobject and filter based onm p-value
# For AC comparisons
PvalueAC_FM$log2FC <- rowMeans(log2(AC_RT[, 1:2])) - rowMeans(log2(AC_RT[, 3:5]))
PvalueAC_FR$log2FC <- rowMeans(log2(AC_RT[, 1:2])) - rowMeans(log2(AC_RT[, 6:8]))
PvalueAC_MR$log2FC <- rowMeans(log2(AC_RT[, 3:5])) - rowMeans(log2(AC_RT[, 6:8]))

# For MG comparisons
PvalueMG_FM$log2FC <- rowMeans(log2(MG_RT[, 1:3])) - rowMeans(log2(MG_RT[, 4:6]))
PvalueMG_FR$log2FC <- rowMeans(log2(MG_RT[, 1:3])) - rowMeans(log2(MG_RT[, 7:9]))
PvalueMG_MR$log2FC <- rowMeans(log2(MG_RT[, 4:6])) - rowMeans(log2(MG_RT[, 7:9]))


#Removing the P-values over 0.05 for all p-values objects
#AC
sign_PvalueAC_FM <-
  PvalueAC_FM[PvalueAC_FM$Pvalue < 0.05, , drop = F]
sign_PvalueAC_FM <- na.omit(sign_PvalueAC_FM)

sign_PvalueAC_FR <-
  PvalueAC_FR[PvalueAC_FR$Pvalue < 0.05, , drop = F]
sign_PvalueAC_FR <- na.omit(sign_PvalueAC_FR)

sign_PvalueAC_MR <-
  PvalueAC_MR[PvalueAC_MR$Pvalue < 0.05, , drop = F]
sign_PvalueAC_MR <- na.omit(sign_PvalueAC_MR)

# MG
sign_PvalueMG_FM <-
  PvalueMG_FM[PvalueMG_FM$Pvalue < 0.05, , drop = F]
sign_PvalueMG_FM <- na.omit(sign_PvalueMG_FM)

sign_PvalueMG_FR <-
  PvalueMG_FR[PvalueMG_FR$Pvalue < 0.05, , drop = F]
sign_PvalueMG_FR <- na.omit(sign_PvalueMG_FR)

sign_PvalueMG_MR <-
  PvalueMG_MR[PvalueMG_MR$Pvalue < 0.05, , drop = F]
sign_PvalueMG_MR <- na.omit(sign_PvalueMG_MR)




# Convert gene symbols to ENTREZIDs
Deg_AC_FM <-
  bitr(
    sign_PvalueAC_FM$GeneNames,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = "org.Mm.eg.db"
  )
## 'select()' returned 1:1 mapping between keys and columns
Deg_AC_FR <-
  bitr(
    sign_PvalueAC_FR$GeneNames,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = "org.Mm.eg.db"
  )
## 'select()' returned 1:1 mapping between keys and columns
Deg_AC_MR <-
  bitr(
    sign_PvalueAC_MR$GeneNames,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = "org.Mm.eg.db"
  )
## 'select()' returned 1:1 mapping between keys and columns
Deg_MG_FM <-
  bitr(
    sign_PvalueMG_FM$GeneNames,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = "org.Mm.eg.db"
  )
## 'select()' returned 1:1 mapping between keys and columns
Deg_MG_FR <-
  bitr(
    sign_PvalueMG_FR$GeneNames,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = "org.Mm.eg.db"
  )
## 'select()' returned 1:1 mapping between keys and columns
Deg_MG_MR <-
  bitr(
    sign_PvalueMG_MR$GeneNames,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = "org.Mm.eg.db"
  )
## 'select()' returned 1:1 mapping between keys and columns
# One gene was converted twice by bitr in the sign_PvslueMG_FR object, thus it contains duplicates
# Remove one of the duplicates
sign_PvalueMG_FR <-
  sign_PvalueMG_FR[!duplicated(sign_PvalueMG_FR$GeneNames),]
# Same
sign_PvalueAC_MR <-
  sign_PvalueAC_MR[!duplicated(sign_PvalueAC_MR$GeneNames),]

##
# TOO do
# Pathway analysis


# create a background gene file on all of the genes
Backround_Genes <-
  data.frame(rawCounts$GeneNames) # create a data frame with the names of all measured genes

Backround_Genes <-
  bitr(
    Backround_Genes$rawCounts.GeneNames,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = "org.Mm.eg.db"
  ) # convert symbol to ENTREZID
## 'select()' returned 1:1 mapping between keys and columns
table(duplicated(Backround_Genes$ENTREZID)) #check if duplicated entrez ID
## 
## FALSE 
## 10677
# AC
keggAstrocyte_FM <- enrichKEGG(
  gene = Deg_AC_FM$ENTREZID,
  organism = "mmu",
  keyType = "kegg",
  universe = Backround_Genes$ENTREZID,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 1
)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/mmu/pathway"...
## Warning in utils::download.file(url, quiet = TRUE, method = method, ...): the
## 'wininet' method is deprecated for http:// and https:// URLs
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/mmu"...
## Warning in utils::download.file(url, quiet = TRUE, method = method, ...): the
## 'wininet' method is deprecated for http:// and https:// URLs
keggAstrocyte_FM <-
  setReadable(keggAstrocyte_FM, OrgDb = org.Mm.eg.db, keyType = "ENTREZID")


keggAstrocyte_summary_FM <- keggAstrocyte_FM@result
keggAstrocyte_summary_FM <-
  keggAstrocyte_summary_FM[keggAstrocyte_summary_FM$pvalue < 0.05, , drop = F]

#
keggAstrocyte_FR <- enrichKEGG(
  gene = Deg_AC_FR$ENTREZID,
  organism = "mmu",
  keyType = "kegg",
  universe = Backround_Genes$ENTREZID,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 1
)


keggAstrocyte_FM <-
  setReadable(keggAstrocyte_FR, OrgDb = org.Mm.eg.db, keyType = "ENTREZID")


keggAstrocyte_summary_FR <- keggAstrocyte_FR@result
keggAstrocyte_summary_FR <-
  keggAstrocyte_summary_FR[keggAstrocyte_summary_FR$pvalue < 0.05, , drop = F]

#
keggAstrocyte_MR <- enrichKEGG(
  gene = Deg_AC_MR$ENTREZID,
  organism = "mmu",
  keyType = "kegg",
  universe = Backround_Genes$ENTREZID,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 1
)


keggAstrocyte_MR <-
  setReadable(keggAstrocyte_MR, OrgDb = org.Mm.eg.db, keyType = "ENTREZID")


keggAstrocyte_summary_MR <- keggAstrocyte_MR@result
keggAstrocyte_summary_MR <-
  keggAstrocyte_summary_MR[keggAstrocyte_summary_MR$pvalue < 0.05, , drop = F]

# KEGG pathway analysis
# MG
keggMicroglia_FM <- enrichKEGG(
  gene = Deg_MG_FM$ENTREZID,
  organism = "mmu",
  keyType = "kegg",
  universe = Backround_Genes$ENTREZID,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 1
)


keggMicroglia_FM <-
  setReadable(keggMicroglia_FM, OrgDb = org.Mm.eg.db, keyType = "ENTREZID")


keggMicroglia_summary_FM <- keggMicroglia_FM@result
keggMicroglia_summary_FM <-
  keggMicroglia_summary_FM[keggMicroglia_summary_FM$pvalue < 0.05, , drop = F]

#
keggMicroglia_FR <- enrichKEGG(
  gene = Deg_MG_FR$ENTREZID,
  organism = "mmu",
  keyType = "kegg",
  universe = Backround_Genes$ENTREZID,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 1
)


keggMicroglia_FM <-
  setReadable(keggMicroglia_FR, OrgDb = org.Mm.eg.db, keyType = "ENTREZID")


keggMicroglia_summary_FR <- keggMicroglia_FR@result
keggMicroglia_summary_FR <-
  keggMicroglia_summary_FR[keggMicroglia_summary_FR$pvalue < 0.05, , drop = F]

#
keggMicroglia_MR <- enrichKEGG(
  gene = Deg_MG_MR$ENTREZID,
  organism = "mmu",
  keyType = "kegg",
  universe = Backround_Genes$ENTREZID,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  qvalueCutoff = 1
)


keggMicroglia_MR <-
  setReadable(keggMicroglia_MR, OrgDb = org.Mm.eg.db, keyType = "ENTREZID")


keggMicroglia_summary_MR <- keggMicroglia_MR@result
keggMicroglia_summary_MR <-
  keggMicroglia_summary_MR[keggMicroglia_summary_MR$pvalue < 0.05, , drop = F]
########################

create_custom_dot_plot <- function(Kegg_result) {
  # Create a new column 'geneRatioDecimal' in our dataframe which holds the numeric ratio
  Kegg_result$GeneRatioDecimal <-
    as.numeric(unlist(lapply(strsplit(as.character(Kegg_result$GeneRatio), "/"), function(x)
      as.numeric(x[1]) / as.numeric(x[2]))))
  
  dot_plot <-
    ggplot(Kegg_result,
           aes(
             x = reorder(Description,-GeneRatioDecimal),
             y = GeneRatioDecimal,
             color = pvalue,
             size = Count
           )) +
    geom_point() +
    scale_color_gradient(low = "blue", high = "red") +
    theme_minimal() +
    theme(
      axis.title.x = element_blank(),
      axis.text.x = element_text(
        size = 8,
        angle = 75,
        hjust = 1
      ),
      plot.title = element_text(size = 12, hjust = 0.5),
      panel.background = element_rect(fill = "gray90"),
      plot.background = element_rect(fill = "white")
    ) +
    labs(title = "Kegg Enrichment plot",
         y = "GeneRatio",
         x = "Description")
  
  
  return(dot_plot)
}

dot_plot_AC_FM <- create_custom_dot_plot(keggAstrocyte_summary_FM)
dot_plot_AC_FR <- create_custom_dot_plot(keggAstrocyte_summary_FR)
dot_plot_AC_MR <- create_custom_dot_plot(keggAstrocyte_summary_MR)

dot_plot_MG_FM <- create_custom_dot_plot(keggMicroglia_summary_FM)
dot_plot_MG_FR <- create_custom_dot_plot(keggMicroglia_summary_FR)
dot_plot_MG_MR <- create_custom_dot_plot(keggMicroglia_summary_MR)



#AC
log2fc_AC_FM <- data.frame(sign_PvalueAC_FM$log2FC)
rownames(log2fc_AC_FM) <- Deg_AC_FM$ENTREZID
log2fc_AC_FM <- na.omit(log2fc_AC_FM)

log2fc_AC_FR <- data.frame(sign_PvalueAC_FR$log2FC)
rownames(log2fc_AC_FR) <- Deg_AC_FR$ENTREZID
log2fc_AC_FR <- na.omit(log2fc_AC_FR)


log2fc_AC_MR <- data.frame(sign_PvalueAC_MR$log2FC)
rownames(log2fc_AC_MR) <- Deg_AC_MR$ENTREZID
log2fc_AC_MR <- na.omit(log2fc_AC_MR)


# MG
log2fc_MG_FM <- data.frame(sign_PvalueMG_FM$log2FC)
rownames(log2fc_MG_FM) <- Deg_MG_FM$ENTREZID
log2fc_MG_FM <- na.omit(log2fc_MG_FM)


log2fc_MG_FR <- data.frame(sign_PvalueMG_FR$log2FC)
rownames(log2fc_MG_FR) <- Deg_MG_FR$ENTREZID
log2fc_MG_FR <- na.omit(log2fc_MG_FR)


log2fc_MG_MR <- data.frame(sign_PvalueMG_MR$log2FC)
rownames(log2fc_MG_MR) <- Deg_MG_MR$ENTREZID
log2fc_MG_MR <- na.omit(log2fc_MG_MR)

# Pathview analysis
# AC_FM
pathview(
  gene.data = log2fc_AC_FM,
  pathway.id = "mmu05033",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05033.pathview.png
pathview(
  gene.data = log2fc_AC_FM,
  pathway.id = "mmu04080",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04080.pathview.png
pathview(
  gene.data = log2fc_AC_FM,
  pathway.id = "mmu04659",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04659.pathview.png
pathview(
  gene.data = log2fc_AC_FM,
  pathway.id = "mmu05208",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05208.pathview.png
pathview(
  gene.data = log2fc_AC_FM,
  pathway.id = "mmu04060",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04060.pathview.png
# MG_FM
pathview(
  gene.data = log2fc_MG_FM,
  pathway.id = "mmu05031",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05031.pathview.png
pathview(
  gene.data = log2fc_MG_FM,
  pathway.id = "mmu04728",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04728.pathview.png
pathview(
  gene.data = log2fc_MG_FM,
  pathway.id = "mmu05030",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05030.pathview.png
pathview(
  gene.data = log2fc_MG_FM,
  pathway.id = "mmu04024",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04024.pathview.png
pathview(
  gene.data = log2fc_MG_FM,
  pathway.id = "mmu05033",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05033.pathview.png
pathview(
  gene.data = log2fc_MG_FM,
  pathway.id = "mmu04024",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04024.pathview.png
# AC_FR
pathview(
  gene.data = log2fc_AC_FR,
  pathway.id = "mmu04080",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04080.pathview.png
pathview(
  gene.data = log2fc_AC_FR,
  pathway.id = "mmu04934",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04934.pathview.png
pathview(
  gene.data = log2fc_AC_FR,
  pathway.id = "mmu04727",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04727.pathview.png
pathview(
  gene.data = log2fc_AC_FR,
  pathway.id = "mmu00900",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu00900.pathview.png
pathview(
  gene.data = log2fc_AC_FR,
  pathway.id = "mmu00562",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu00562.pathview.png
# MG_FR
pathview(
  gene.data = log2fc_MG_FR,
  pathway.id = "mmu04724",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04724.pathview.png
pathview(
  gene.data = log2fc_MG_FR,
  pathway.id = "mmu04020",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04020.pathview.png
pathview(
  gene.data = log2fc_MG_FR,
  pathway.id = "mmu04072",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04072.pathview.png
pathview(
  gene.data = log2fc_MG_FR,
  pathway.id = "mmu05031",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05031.pathview.png
pathview(
  gene.data = log2fc_MG_FR,
  pathway.id = "mmu04713",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04713.pathview.png
pathview(
  gene.data = log2fc_MG_FR,
  pathway.id = "mmu04080",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04080.pathview.png
# AC_MR
pathview(
  gene.data = log2fc_AC_MR,
  pathway.id = "mmu04080",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04080.pathview.png
pathview(
  gene.data = log2fc_AC_MR,
  pathway.id = "mmu04724",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04724.pathview.png
pathview(
  gene.data = log2fc_AC_MR,
  pathway.id = "mmu05033",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05033.pathview.png
pathview(
  gene.data = log2fc_AC_MR,
  pathway.id = "mmu04727",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04727.pathview.png
# MG_MR
pathview(
  gene.data = log2fc_MG_MR,
  pathway.id = "mmu04020",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04020.pathview.png
pathview(
  gene.data = log2fc_MG_MR,
  pathway.id = "mmu04970",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04970.pathview.png
pathview(
  gene.data = log2fc_MG_MR,
  pathway.id = "mmu04724",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04724.pathview.png
pathview(
  gene.data = log2fc_MG_MR,
  pathway.id = "mmu04024",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04024.pathview.png
pathview(
  gene.data = log2fc_MG_MR,
  pathway.id = "mmu04925",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu04925.pathview.png
pathview(
  gene.data = log2fc_MG_MR,
  pathway.id = "mmu05414",
  low = "red",
  mid = "grey",
  high = "green",
  species = "mmu",
  limit = c(-10, 10)
)
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory G:/Other computers/Min dator/Skol/linköping UNI/WorkSpaceDirect/MouseBrainProjects/RmarkDown
## Info: Writing image file mmu05414.pathview.png
# AC
num_log2fc_AC_FM <- sign_PvalueAC_FM$log2FC
names(num_log2fc_AC_FM) <- Deg_AC_FM$ENTREZID

num_log2fc_AC_FR <- sign_PvalueAC_FR$log2FC
names(num_log2fc_AC_FR) <- Deg_AC_FR$ENTREZID

num_log2fc_AC_MR <- sign_PvalueAC_MR$log2FC
names(num_log2fc_AC_MR) <- Deg_AC_MR$ENTREZID


# MG
num_log2fc_MG_FM <- sign_PvalueMG_FM$log2FC
names(num_log2fc_MG_FM) <- Deg_MG_FM$ENTREZID

num_log2fc_MG_FR <- sign_PvalueMG_FR$log2FC
names(num_log2fc_MG_FR) <- Deg_MG_FR$ENTREZID

num_log2fc_MG_MR <- sign_PvalueMG_MR$log2FC
names(num_log2fc_MG_MR) <- Deg_MG_MR$ENTREZID

# AC
Sort_log2fc_AC_FM <- sort(num_log2fc_AC_FM, decreasing = TRUE)
Sort_log2fc_AC_FR <- sort(num_log2fc_AC_FR, decreasing = TRUE)
Sort_log2fc_AC_MR <- sort(num_log2fc_AC_MR, decreasing = TRUE)

# MG
Sort_log2fc_MG_FM <- sort(num_log2fc_MG_FM, decreasing = TRUE)
Sort_log2fc_MG_FR <- sort(num_log2fc_MG_FR, decreasing = TRUE)
Sort_log2fc_MG_MR <- sort(num_log2fc_MG_MR, decreasing = TRUE)



# AC
gseaKEGG_AC_FM <- gseKEGG(
  geneList = Sort_log2fc_AC_FM,
  organism = "mmu",
  pvalueCutoff = 1,
  pAdjustMethod = "none"
) # Mus Musculus
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
gseaKEGG_AC_FR <- gseKEGG(
  geneList = Sort_log2fc_AC_FR,
  organism = "mmu",
  pvalueCutoff = 0.05,
  pAdjustMethod = "none"
) # Mus Musculus
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
gseaKEGG_AC_MR <- gseKEGG(
  geneList = Sort_log2fc_AC_MR,
  organism = "mmu",
  pvalueCutoff = 0.05,
  pAdjustMethod = "none"
) # Mus Musculus
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
# MG
gseaKEGG_MG_FM <- gseKEGG(
  geneList = Sort_log2fc_MG_FM,
  organism = "mmu",
  pvalueCutoff = 1,
  pAdjustMethod = "none"
) # Mus Musculus
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
gseaKEGG_MG_FR <- gseKEGG(
  geneList = Sort_log2fc_MG_FR,
  organism = "mmu",
  pvalueCutoff = 0.05,
  pAdjustMethod = "none"
) # Mus Musculus
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
gseaKEGG_MG_MR <- gseKEGG(
  geneList = Sort_log2fc_MG_MR,
  organism = "mmu",
  pvalueCutoff = 0.05,
  pAdjustMethod = "none"
) # Mus Musculus
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
View(gseaKEGG_AC_FM@result)
View(gseaKEGG_AC_FR@result)
View(gseaKEGG_AC_MR@result)

View(gseaKEGG_MG_FM@result)
View(gseaKEGG_MG_FR@result)
View(gseaKEGG_MG_MR@result)

print(gseaKEGG_AC_FM@result$ID)
##  [1] "mmu05012" "mmu04080" "mmu05208" "mmu05415" "mmu04714" "mmu05200"
##  [7] "mmu05010" "mmu05020" "mmu05014" "mmu01100" "mmu04723" "mmu05016"
## [13] "mmu05022"
print(gseaKEGG_AC_FR@result$ID)
##  [1] "mmu01212" "mmu01230" "mmu01200" "mmu04146" "mmu00020" "mmu04151"
##  [7] "mmu05412" "mmu05410" "mmu05030" "mmu04260" "mmu04919" "mmu00280"
## [13] "mmu05414" "mmu04080" "mmu04010"
print(gseaKEGG_AC_MR@result$ID)
##  [1] "mmu03010" "mmu00640" "mmu00230" "mmu04020" "mmu00071" "mmu00630"
##  [7] "mmu04080" "mmu04217" "mmu04146" "mmu00280" "mmu04310" "mmu04922"
## [13] "mmu01212" "mmu00620" "mmu01200" "mmu04814" "mmu04260"
print(gseaKEGG_MG_FM@result$ID)
##  [1] "mmu04024" "mmu04015" "mmu04020" "mmu04360" "mmu04723" "mmu04728"
##  [7] "mmu05200" "mmu01100" "mmu04010" "mmu05022" "mmu05016"
print(gseaKEGG_MG_FR@result$ID)
##  [1] "mmu04921" "mmu04142" "mmu05032" "mmu00230" "mmu04725" "mmu04310"
##  [7] "mmu04912" "mmu05230" "mmu04911" "mmu01100" "mmu05163" "mmu04931"
## [13] "mmu04360"
print(gseaKEGG_MG_MR@result$ID)
## [1] "mmu04921" "mmu04020" "mmu04912" "mmu04270" "mmu04062" "mmu04924" "mmu05163"
## [8] "mmu04611"
write_xlsx(gseaKEGG_AC_FM@result, "GSEA_AC_FM.xlsx")
write_xlsx(gseaKEGG_AC_FR@result, "GSEA_AC_FR.xlsx")
write_xlsx(gseaKEGG_AC_MR@result, "GSEA_AC_MR.xlsx")

write_xlsx(gseaKEGG_MG_FM@result, "GSEA_MG_FM.xlsx")
write_xlsx(gseaKEGG_MG_FR@result, "GSEA_MG_FR.xlsx")
write_xlsx(gseaKEGG_MG_MR@result, "GSEA_MG_MR.xlsx")

# Plot the GSEA plot for one of the enriched pathways observed in the enrichedKEGG object
# Gen Set ID mmu00010
# AC
gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05012", title = "GSEA enrichment score plot Astrocytes Front & Middle - Parkinson disease")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu04080", title = "GSEA enrichment score plot Astrocytes Front & Middle - Neuroactive ligand-receptor interaction")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05208", title = "GSEA enrichment score plot Astrocytes Front & Middle - Chemical carcinogenesis")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05415", title = "GSEA enrichment score plot Astrocytes Front & Middle - Diabetic cardiomyopathy")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu04714", title = "GSEA enrichment score plot Astrocytes Front & Middle - Thermogenesis")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05200", title = "GSEA enrichment score plot Astrocytes Front & Middle - Pathways in cancer")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05010", title = "GSEA enrichment score plot Astrocytes Front & Middle - Alzheimer disease")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05020", title = "GSEA enrichment score plot Astrocytes Front & Middle - Prion disease")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu01100", title = "GSEA enrichment score plot Astrocytes Front & Middle - Metabolic pathways")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05014", title = "GSEA enrichment score plot Astrocytes Front & Middle - Amyotrophic lateral sclerosis (ALS)")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu04723", title = "GSEA enrichment score plot Astrocytes Front & Middle - Retrograde endocannabinoid signaling")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05016", title = "GSEA enrichment score plot Astrocytes Front & Middle - Huntington's disease")

gseaplot(gseaKEGG_AC_FM, geneSetID = "mmu05022", title = "GSEA enrichment score plot Astrocytes Front & Middle - Systemic lupus erythematosus")

# AC
gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu01230", title = "GSEA enrichment score plot Astrocytes Front & Rare - Biosynthesis of amino acids")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu01212", title = "GSEA enrichment score plot Astrocytes Front & Rare - Fatty acid metabolism")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu01200", title = "GSEA enrichment score plot Astrocytes Front & Rare - Carbon metabolism")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu00020", title = "GSEA enrichment score plot Astrocytes Front & Rare - Citrate cycle (TCA cycle)")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu04146", title = "GSEA enrichment score plot Astrocytes Front & Rare - Peroxisome")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu04151", title = "GSEA enrichment score plot Astrocytes Front & Rare - PI3K-Akt signaling pathway")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu05412", title = "GSEA enrichment score plot Astrocytes Front & Rare - Arrhythmogenic right ventricular cardiomyopathy (ARVC)")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu05410", title = "GSEA enrichment score plot Astrocytes Front & Rare - Hypertrophic cardiomyopathy (HCM)")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu04260", title = "GSEA enrichment score plot Astrocytes Front & Rare - Cardiac muscle contraction")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu04010", title = "GSEA enrichment score plot Astrocytes Front & Rare - MAPK signaling pathway")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu05030", title = "GSEA enrichment score plot Astrocytes Front & Rare - Cocaine addiction")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu00280", title = "GSEA enrichment score plot Astrocytes Front & Rare - Valine, leucine, and isoleucine degradation")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu05414", title = "GSEA enrichment score plot Astrocytes Front & Rare - Dilated cardiomyopathy")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu04919", title = "GSEA enrichment score plot Astrocytes Front & Rare - Thyroid hormone signaling pathway")

gseaplot(gseaKEGG_AC_FR, geneSetID = "mmu04921", title = "GSEA enrichment score plot Astrocytes Front & Rare - Oxytocin signaling pathway")

# AC
gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu03010", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Ribosome")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu00230", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Purine metabolism")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu00640", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Propanoate metabolism")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu00071", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Fatty acid degradation")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu04217", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Necroptosis")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu00630", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Glyoxylate and dicarboxylate metabolism")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu00280", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Valine, leucine, and isoleucine degradation")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu04080", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Neuroactive ligand-receptor interaction")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu04146", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Peroxisome")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu04310", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Wnt signaling pathway")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu04922", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Glucagon signaling pathway")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu04814", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Renin secretion")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu00620", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Pyruvate metabolism")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu01212", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Fatty acid metabolism")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu01232", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Nucleotide metabolism")

gseaplot(gseaKEGG_AC_MR, geneSetID = "mmu04260", title = "GSEA enrichment score plot Astrocytes Middle & Rare - Cardiac muscle contraction")

# MG
gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu04024", title = "GSEA enrichment score plot Microglia Front & Middle - cAMP signaling pathway")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu04020", title = "GSEA enrichment score plot Microglia Front & Middle - Calcium signaling pathway")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu04015", title = "GSEA enrichment score plot Microglia Front & Middle - Rap1 signaling pathway")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu04360", title = "GSEA enrichment score plot Microglia Front & Middle - Axon guidance")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu04723", title = "GSEA enrichment score plot Microglia Front & Middle - Retrograde endocannabinoid signaling")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu04728", title = "GSEA enrichment score plot Microglia Front & Middle - Dopaminergic synapse")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu05200", title = "GSEA enrichment score plot Microglia Front & Middle - Pathways in cancer")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu01100", title = "GSEA enrichment score plot Microglia Front & Middle - Metabolic pathways")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu04010", title = "GSEA enrichment score plot Microglia Front & Middle - MAPK signaling pathway")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu05022", title = "GSEA enrichment score plot Microglia Front & Middle - Systemic lupus erythematosus")

gseaplot(gseaKEGG_MG_FM, geneSetID = "mmu05016", title = "GSEA enrichment score plot Microglia Front & Middle - Huntington's disease")

# MG
gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu04921", title = "GSEA enrichment score plot Microglia Front & Rare - Oxytocin signaling pathway")

gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu04142", title = "GSEA enrichment score plot Microglia Front & Rare - Lysosome")

gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu05032", title = "GSEA enrichment score plot Microglia Front & Rare - Morphine addiction")

gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu05230", title = "GSEA enrichment score plot Microglia Front & Rare - Central carbon metabolism in cancer")

gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu04912", title = "GSEA enrichment score plot Microglia Front & Rare - GnRH signaling pathway")

gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu01100", title = "GSEA enrichment score plot Microglia Front & Rare - Metabolic pathways")

gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu05163", title = "GSEA enrichment score plot Microglia Front & Rare - Human cytomegalovirus infection")

gseaplot(gseaKEGG_MG_FR, geneSetID = "mmu04020", title = "GSEA enrichment score plot Microglia Front & Rare - Calcium signaling pathway")

gseaplot(gseaKEGG_MG_MR, geneSetID = "mmu04921", title = "GSEA enrichment score plot Microglia Middle & Rare - Oxytocin signaling pathway")

gseaplot(gseaKEGG_MG_MR, geneSetID = "mmu04020", title = "GSEA enrichment score plot Microglia Middle & Rare - Calcium signaling pathway")

gseaplot(gseaKEGG_MG_MR, geneSetID = "mmu04912", title = "GSEA enrichment score plot Microglia Middle & Rare - GnRH signaling pathway")

gseaplot(gseaKEGG_MG_MR, geneSetID = "mmu04270", title = "GSEA enrichment score plot Microglia Middle & Rare - Vascular smooth muscle contraction")

gseaplot(gseaKEGG_MG_MR, geneSetID = "mmu04062", title = "GSEA enrichment score plot Microglia Middle & Rare - Chemokine signaling pathway")

gseaplot(gseaKEGG_MG_MR, geneSetID = "mmu04924", title = "GSEA enrichment score plot Microglia Middle & Rare - Renin secretion")

gseaplot(gseaKEGG_MG_MR, geneSetID = "mmu05163", title = "GSEA enrichment score plot Microglia Middle & Rare - Human cytomegalovirus infection")

# Function to create and save a plot
save_gseaplot <- function(gsea_result, geneSetID, title, file_name) {
  plot <- gseaplot(gsea_result, geneSetID = geneSetID, title = title)
  ggsave(file_name, plot, width = 15, height = 8.5, dpi = 300)
}

# Define a list of pathway IDs and titles for AC_FM
AC_FM_pathways <- list(
  mmu05012 = "Parkinson disease",
  mmu04080 = "Neuroactive ligand-receptor interaction",
  mmu05208 = "Chemical carcinogenesis",
  mmu05415 = "Diabetic cardiomyopathy",
  mmu04714 = "Thermogenesis",
  mmu05200 = "Pathways in cancer",
  mmu05010 = "Alzheimer disease",
  mmu05020 = "Prion disease",
  mmu01100 = "Metabolic pathways",
  mmu05014 = "Amyotrophic lateral sclerosis (ALS)",
  mmu04723 = "Retrograde endocannabinoid signaling",
  mmu05016 = "Huntington's disease",
  mmu05022 = "Systemic lupus erythematosus"
)

# Loop through each pathway ID and title to save plots for AC_FM
for (id in names(AC_FM_pathways)) {
  title <- AC_FM_pathways[[id]]
  file_name <- sprintf("GSEA_AC_FM_%s.png", gsub(" ", "_", title))
  save_gseaplot(gseaKEGG_AC_FM, id, sprintf("Astrocytes Front & Middle - %s", title), file_name)
}

AC_FR_pathways <- list(
  mmu01230 = "Biosynthesis of amino acids",
  mmu01212 = "Fatty acid metabolism",
  mmu01200 = "Carbon metabolism",
  mmu00020 = "Citrate cycle (TCA cycle)",
  mmu04146 = "Peroxisome",
  mmu04151 = "PI3K-Akt signaling pathway",
  mmu05412 = "Arrhythmogenic right ventricular cardiomyopathy (ARVC)",
  mmu05410 = "Hypertrophic cardiomyopathy (HCM)",
  mmu04260 = "Cardiac muscle contraction",
  mmu04010 = "MAPK signaling pathway",
  mmu05030 = "Cocaine addiction",
  mmu00280 = "Valine, leucine, and isoleucine degradation",
  mmu05414 = "Dilated cardiomyopathy",
  mmu04919 = "Thyroid hormone signaling pathway",
  mmu04921 = "Oxytocin signaling pathway"
)

# Save plots for AC_FR
for (id in names(AC_FR_pathways)) {
  title <- AC_FR_pathways[[id]]
  file_name <- sprintf("GSEA_AC_FR_%s.png", gsub("[ /]", "_", title))
  save_gseaplot(gseaKEGG_AC_FR, id, sprintf("Astrocytes Front & Rare - %s", title), file_name)
}

AC_MR_pathways <- list(
  mmu03010 = "Ribosome",
  mmu00230 = "Purine metabolism",
  mmu00640 = "Propanoate metabolism",
  mmu00071 = "Fatty acid degradation",
  mmu04217 = "Necroptosis",
  mmu00630 = "Glyoxylate and dicarboxylate metabolism",
  mmu00280 = "Valine, leucine, and isoleucine degradation",
  mmu04080 = "Neuroactive ligand-receptor interaction",
  mmu04146 = "Peroxisome",
  mmu04310 = "Wnt signaling pathway",
  mmu04922 = "Glucagon signaling pathway",
  mmu04814 = "Renin secretion",
  mmu00620 = "Pyruvate metabolism",
  mmu01212 = "Fatty acid metabolism",
  mmu01232 = "Nucleotide metabolism",
  mmu04260 = "Cardiac muscle contraction"
)

for (id in names(AC_MR_pathways)) {
  title <- AC_MR_pathways[[id]]
  file_name <- sprintf("GSEA_AC_MR_%s.png", gsub("[ /]", "_", title))
  save_gseaplot(gseaKEGG_AC_MR, id, sprintf("Astrocytes Middle & Rare - %s", title), file_name)
}

MG_FM_pathways <- list(
  mmu04024 = "cAMP signaling pathway",
  mmu04020 = "Calcium signaling pathway",
  mmu04015 = "Rap1 signaling pathway",
  mmu04360 = "Axon guidance",
  mmu04723 = "Retrograde endocannabinoid signaling",
  mmu04728 = "Dopaminergic synapse",
  mmu05200 = "Pathways in cancer",
  mmu01100 = "Metabolic pathways",
  mmu04010 = "MAPK signaling pathway",
  mmu05022 = "Systemic lupus erythematosus",
  mmu05016 = "Huntington's disease"
)

for (id in names(MG_FM_pathways)) {
  title <- MG_FM_pathways[[id]]
file_name <- sprintf("GSEA_MG_FM_%s.png", gsub("[ /]", "_", title))
  save_gseaplot(gseaKEGG_MG_FM, id, sprintf("Microglia Front & Middle - %s", title), file_name)
}

MG_FR_pathways <- list(
  mmu04921 = "Oxytocin signaling pathway",
  mmu04142 = "Lysosome",
  mmu05032 = "Morphine addiction",
  mmu05230 = "Central carbon metabolism in cancer",
  mmu04912 = "GnRH signaling pathway",
  mmu01100 = "Metabolic pathways",
  mmu05163 = "Human cytomegalovirus infection",
  mmu04020 = "Calcium signaling pathway"
)

for (id in names(MG_FR_pathways)) {
  title <- MG_FR_pathways[[id]]
  file_name <- sprintf("GSEA_MG_FR_%s.png", gsub("[ /]", "_", title))
  save_gseaplot(gseaKEGG_MG_FR, id, sprintf("Microglia Front & Rare - %s", title), file_name)
}

MG_MR_pathways <- list(
  mmu04921 = "Oxytocin signaling pathway",
  mmu04020 = "Calcium signaling pathway",
  mmu04912 = "GnRH signaling pathway",
  mmu04270 = "Vascular smooth muscle contraction",
  mmu04062 = "Chemokine signaling pathway",
  mmu04924 = "Renin secretion",
  mmu05163 = "Human cytomegalovirus infection"
)

for (id in names(MG_MR_pathways)) {
  title <- MG_MR_pathways[[id]]
  file_name <- sprintf("GSEA_MG_MR_%s.png", gsub("[ /]", "_", title))
  save_gseaplot(gseaKEGG_MG_MR, id, sprintf("Microglia Middle & Rare - %s", title), file_name)
}


## Save dotplots from KEGG enrichment plot
ggsave(
  filename = "dot_plot_AC_FM.png",
  plot = dot_plot_AC_FM,
  width = 10,
  height = 12,
  dpi = 300
)
ggsave(
  filename = "dot_plot_AC_FR.png",
  plot = dot_plot_AC_FR,
  width = 10,
  height = 12,
  dpi = 300
)
ggsave(
  filename = "dot_plot_AC_MR.png",
  plot = dot_plot_AC_MR,
  width = 10,
  height = 12,
  dpi = 300
)

ggsave(
  filename = "dot_plot_MG_FM.png",
  plot = dot_plot_MG_FM,
  width = 10,
  height = 12,
  dpi = 300
)
ggsave(
  filename = "dot_plot_MG_FR.png",
  plot = dot_plot_MG_FR,
  width = 10,
  height = 12,
  dpi = 300
)
ggsave(
  filename = "dot_plot_MG_MR.png",
  plot = dot_plot_MG_MR,
  width = 10,
  height = 12,
  dpi = 300
)
# read the data file with all samples
rawCounts <-
  read.table("raw_counts_cpm_after_norm_reorganised_220722.txt", header = TRUE)


# Load sample information data
sampleInfo <- read.table("Sample-info_220722.txt", header = TRUE)


# Load cell type marker genes data from an Excel file
cellType_markerGenes <- read_excel("Celltype_markergenes.xlsx")



# USing biomaRt annotation  method.

# WARNING THIS FUNCTION DOES NOT WORK we USE OLD LOADED DATASET FOR GENESYMBOLS Y THIS FUNCTION 2023-12-27.
# ensembl <- useMart("ensembl")
# 
# listDatasets(ensembl)
# 
# ensembl_mouse <- useDataset("mmusculus_gene_ensembl", mart = ensembl)
# 
# mouse_gene_ids <- rawCounts$ensembl_id
# THIS FUNCTION getBM() SHOULD WORK BUT HAS A TIME TOKEN LIMIT ERROR ON THE collect() function THAT NEEDS DEBUGGING.
# mouse_gene_info <-
#   getBM(
#     attributes = c(
#       "ensembl_gene_id",
#       "external_gene_name",
#       "chromosome_name",
#       "start_position",
#       "end_position",
#       "gene_biotype",
#       "strand"
#     ),
#     filters = "ensembl_gene_id",
#     values = mouse_gene_ids,
#     mart = ensembl_mouse
#   )
mouse_gene_info <- readRDS("mouse_gene_info.rds")
# Changing name of column too merge the values with a matching column.
colnames(mouse_gene_info)[c(1)] <- c("ensembl_id")
# Merging the two dataframes into one dataframe leaving out one
rawCounts <- merge(rawCounts, mouse_gene_info, c(1))

rawCounts <- rawCounts[,-c(1, 38:42)]

colnames(rawCounts)[c(36)] <-  c("GeneNames")

# keep only the first occurrence of each duplicated gene name
rawCounts <- rawCounts[!duplicated(rawCounts$GeneNames),]

# add gene name to  row names 
row.names(rawCounts) <-  rawCounts$GeneNames
rawCounts <- rawCounts[,-36]

# Keep only rows where all values are above 1
#rawCounts <- rawCounts[apply(rawCounts, 1, function(x) all(x > 1)), ]


# Filter the data using logical indexing
# rawCounts <- rawCounts[rowSums(rawCounts >= 40) > 0, ]

# separating the columns by region
# into  separated dataframe object for the rawCounts and sampleInfo
rawCounts_F <- rawCounts[, grep("_F_", colnames(rawCounts))]
rawCounts_M <- rawCounts[, grep("_M_", colnames(rawCounts))]
rawCounts_R <- rawCounts[, grep("_R_", colnames(rawCounts))]


sampleInfo_F <- sampleInfo[grep("_F_", sampleInfo$sample),]
sampleInfo_M <- sampleInfo[grep("_M_", sampleInfo$sample),]
sampleInfo_R <- sampleInfo[grep("_R_", sampleInfo$sample),]



# order Column names 
rawCounts <- rawCounts[order(colnames(rawCounts))]
sampleInfo <- sampleInfo[order(sampleInfo$sample), ]

rawCounts_F <- rawCounts_F[order(colnames(rawCounts_F))]
rawCounts_M <- rawCounts_M[order(colnames(rawCounts_M))]

sampleInfo_F <- sampleInfo_F[order(sampleInfo_F$sample), ]
sampleInfo_M <- sampleInfo_M[order(sampleInfo_M$sample), ]

rawCounts_R <- rawCounts_R[order(colnames(rawCounts_R))]
sampleInfo_R <- sampleInfo_R[order(sampleInfo_R$sample), ]


# Log the counts
logCounts <- log2(rawCounts + 1)

logCounts_F <- log2(rawCounts_F + 1)
logCounts_M <- log2(rawCounts_M + 1)
logCounts_R <- log2(rawCounts_R + 1)

###

# Adding marker genes before z-score
dfCounts <- na.omit(logCounts[cellType_markerGenes$gene_symbol, ])
dfCounts_F <- na.omit(logCounts_F[cellType_markerGenes$gene_symbol, ])
dfCounts_M <- na.omit(logCounts_M[cellType_markerGenes$gene_symbol, ])
dfCounts_R <- na.omit(logCounts_R[cellType_markerGenes$gene_symbol, ])

# making function for calculating z score since scale function is not ideal
calc_zscore <- function(df) {
  result <- (df - mean(df)) / sd(df)
  return(result)
}


# calculating the z-score
Z_dfCounts <- apply(dfCounts, 1, calc_zscore)
Z_dfCounts_F <- apply(dfCounts_F, 1, calc_zscore)
Z_dfCounts_M <- apply(dfCounts_M, 1, calc_zscore)
Z_dfCounts_R <- apply(dfCounts_R, 1, calc_zscore)


# Transposition original data frame
Z_dfCounts <- t(Z_dfCounts)
Z_dfCounts_F <- t(Z_dfCounts_F)
Z_dfCounts_M <- t(Z_dfCounts_M)
Z_dfCounts_R <- t(Z_dfCounts_R)

# Separate the columns into RT and TR
RT_columns <- grep("_RT", colnames(Z_dfCounts), value = TRUE)
TR_columns <- grep("_TR", colnames(Z_dfCounts), value = TRUE)

RT_columns_F <- grep("_RT", colnames(Z_dfCounts_F), value = TRUE)
TR_columns_F <- grep("_TR", colnames(Z_dfCounts_F), value = TRUE)

RT_columns_M <- grep("_RT", colnames(Z_dfCounts_M), value = TRUE)
TR_columns_M <- grep("_TR", colnames(Z_dfCounts_M), value = TRUE)


RT_columns_R <- grep("_RT", colnames(Z_dfCounts_R), value = TRUE)
TR_columns_R <- grep("_TR", colnames(Z_dfCounts_R), value = TRUE)


# Combine the columns in the desired order
ordered_columns <- c(RT_columns, TR_columns)
ordered_columns_F <- c(RT_columns_F, TR_columns_F)
ordered_columns_M <- c(RT_columns_M, TR_columns_M)
ordered_columns_R <- c(RT_columns_R, TR_columns_R)

# Reorder the data frame based on these columns
Z_dfCounts_ordered <- Z_dfCounts[, ordered_columns]
Z_dfCounts_F_ordered <-  Z_dfCounts_F[, ordered_columns_F]
Z_dfCounts_M_ordered <-  Z_dfCounts_M[, ordered_columns_M]
Z_dfCounts_R_ordered <-  Z_dfCounts_R[, ordered_columns_R]

# Create a new data frame with gene_symbol and cell_type for row annotation
row_annotation <- data.frame(
  CellType = cellType_markerGenes$cell_type,
  row.names = cellType_markerGenes$gene_symbol
)

col_annotation <- data.frame(Cells = sampleInfo$Cell_type,
                             Region = sampleInfo$Region,
                             Condition = sampleInfo$Condition,
row.names = sampleInfo$sample)

# Define a custom color scheme for cell types
cell_type_colors <- c("blue", "red", "green", "purple", "orange")
names(cell_type_colors) <- c("astro", "GABA", "Glut", "micro", "oligo")

cellType_colors <- c("#fbceb1", "#00ffff")
names(cellType_colors) <- c("Astrocytes", "Microglia")

region_colors <- c("#f0e442", "#d55e00", "#009e73")
names(region_colors) <- c("Front", "Middle", "Rear")

condition_color <- c("#FF00CC", "black")
names(condition_color) <- c("RiboTag", "TotalRNA")

# Generate the custom annotation dataframe
annotation_colors <- list(CellType = cell_type_colors,
                          Cells = cellType_colors,
                          Region = region_colors,
                          Condition = condition_color)

# Create the heatmap
FMR <- pheatmap(
  Z_dfCounts_ordered,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  cluster_cols = F,
  cluster_rows = F,
  annotation_row = row_annotation,
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  main = "Z-score of marker genes across cells Microglia & Astrocytes for all regions",
  fontsize = 8,
  show_colnames = F,
  cellwidth = 12,
  cellheight = 7
)

F1 <- pheatmap(
  Z_dfCounts_F_ordered,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  cluster_cols = F,
  cluster_rows = F,
  annotation_row = row_annotation,
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  main = "Z-score of marker genes across cells Microglia & Astrocytes for front region",
  fontsize = 8,
  cellwidth = 12,
  cellheight = 7
)

M1 <- pheatmap(
  Z_dfCounts_M_ordered,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  cluster_cols = F,
  cluster_rows = F,
  annotation_row = row_annotation,
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  main = "Z-score of marker genes across cells Microglia & Astrocytes for middle region",
  fontsize = 8,
  show_colnames = F,
  cellwidth = 12,
  cellheight = 7
)

R1 <- pheatmap(
  Z_dfCounts_R_ordered,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  cluster_cols = F,
  cluster_rows = F,
  annotation_row = row_annotation,
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  main = "Z-score of marker genes across cells Microglia & Astrocytes for rear region",
  fontsize = 8,
  show_colnames = F,
  cellwidth = 12,
  cellheight = 7
)

ggsave(
  filename = "3Z-score_HeatmapMarkerFrontMiddleRear.png",
  plot = FMR,
  width = 13,
  height = 13
)

ggsave(
  filename = "3Z-score_HeatmapMarkerFront.png",
  plot = F1,
  width = 13,
  height = 13
)
ggsave(
  filename = "3Z-score_HeatmapMarkerMiddle.png",
  plot = M1,
  width = 13,
  height = 13
)
ggsave(
  filename = "3Z-score_HeatmapMarkerRear.png",
  plot = R1,
  width = 13,
  height = 13
)
# read the data file with all samples
rawCounts <-
  read.table("raw_counts_cpm_after_norm_reorganised_220722.txt", header = TRUE)


# Load sample information data
sampleInfo <- read.table("Sample-info_220722.txt", header = TRUE)

# Load cell type marker genes data from an Excel file
ribo_prot <- read_xlsx("ribo_prot.xlsx")

# BioMart is not working properly.
# ensembl <- useMart("ensembl")
# 
# listDatasets(ensembl)
# 
# ensembl_mouse <-
#   useDataset("mmusculus_gene_ensembl", mart = ensembl)
# 
# mouse_gene_ids <- rawCounts$ensembl_id

# THIS FUNCTION getBM() SHOULD WORK BUT HAS A TIME TOKEN LIMIT ERROR ON THE collect() function THAT NEEDS DEBUGGING.
# mouse_gene_info <-
#   getBM(
#     attributes = c(
#       "ensembl_gene_id",
#       "external_gene_name",
#       "chromosome_name",
#       "start_position",
#       "end_position",
#       "gene_biotype",
#       "strand"
#     ),
#     filters = "ensembl_gene_id",
#     values = mouse_gene_ids,
#     mart = ensembl_mouse
#   )

# Changing name of column too merge the values with a matching column.
colnames(mouse_gene_info)[c(1)] <- c("ensembl_id")
# Merging the two dataframes into one dataframe leaving out one
rawCounts <- merge(rawCounts, mouse_gene_info, c(1))

rawCounts <- rawCounts[, -c(1, 38:42)]

colnames(rawCounts)[c(36)] <-  c("GeneNames")

# keep only the first occurrence of each duplicated gene name
rawCounts <- rawCounts[!duplicated(rawCounts$GeneNames), ]

# add gene name to  row names
row.names(rawCounts) <-  rawCounts$GeneNames
rawCounts <- rawCounts[, -36]

# # Keep only rows where all values are above 1
# rawCounts <- rawCounts[apply(rawCounts, 1, function(x) all(x > 1)), ]

# Filter the data using logical indexing
# rawCounts <- rawCounts[rowSums(rawCounts >= 40) > 0, ]

# separating the columns if ribotag and totalRNA  
# into  separated dataframe object for the rawCounts and sampleInfo
rawCounts_F <- rawCounts[, grep("_F_", colnames(rawCounts))]
rawCounts_M <- rawCounts[, grep("_M_", colnames(rawCounts))]
rawCounts_R <- rawCounts[, grep("_R_", colnames(rawCounts))]


sampleInfo_F <- sampleInfo[grep("_F_", sampleInfo$sample),]
sampleInfo_M <- sampleInfo[grep("_M_", sampleInfo$sample),]
sampleInfo_R <- sampleInfo[grep("_R_", sampleInfo$sample),]

# order Column names 
rawCounts <- rawCounts[order(colnames(rawCounts))]
sampleInfo <- sampleInfo[order(sampleInfo$sample), ]

rawCounts_F <- rawCounts_F[order(colnames(rawCounts_F))]
rawCounts_M <- rawCounts_M[order(colnames(rawCounts_M))]

sampleInfo_F <- sampleInfo_F[order(sampleInfo_F$sample), ]
sampleInfo_M <- sampleInfo_M[order(sampleInfo_M$sample), ]

rawCounts_R <- rawCounts_R[order(colnames(rawCounts_R))]
sampleInfo_R <- sampleInfo_R[order(sampleInfo_R$sample), ]

# Log the counts
logCounts <- log2(rawCounts + 1)

logCounts_F <- log2(rawCounts_F + 1)
logCounts_M <- log2(rawCounts_M + 1)
logCounts_R <- log2(rawCounts_R + 1)


# Adding marker genes before z-score
dfCounts_D <- na.omit(logCounts[ribo_prot$Symbols, ])
dfCounts_F_D <- na.omit(logCounts_F[ribo_prot$Symbols, ])
dfCounts_M_D <- na.omit(logCounts_M[ribo_prot$Symbols, ])
dfCounts_R_D <- na.omit(logCounts_R[ribo_prot$Symbols, ])

# making function for calculating z score since scale function is not ideal
calc_zscore <- function(df) {
  result <- (df - mean(df)) / sd(df)
  return(result)
}


# calculating the z-score
Z_dfCounts_D <- apply(dfCounts_D, 1, calc_zscore)
Z_dfCounts_F_D <- apply(dfCounts_F_D, 1, calc_zscore)
Z_dfCounts_M_D <- apply(dfCounts_M_D, 1, calc_zscore)
Z_dfCounts_R_D <- apply(dfCounts_R_D, 1, calc_zscore)



# # keep only the first occurrence of each duplicated gene name
# ribo_prot <- ribo_prot[!duplicated(ribo_prot$Symbols),]
# # Create a new data frame with gene_symbol and cell_type for row annotation
# row_annotation_D <- data.frame(
#   Cromosome = ribo_prot$`Chromosome Number`,
#   row.names = ribo_prot$Symbols
# )
# 
# # Define a custom color scheme for cell types
# 
# # Generate the custom annotation dataframe
# annotation_D <- data.frame(row_annotation_D)
# rownames(annotation_D) <- rownames(row_annotation_D)



# Your original data frame
Z_dfCounts_D <- t(Z_dfCounts_D)
Z_dfCounts_F_D <- t(Z_dfCounts_F_D)
Z_dfCounts_M_D <- t(Z_dfCounts_M_D)
Z_dfCounts_R_D <- t(Z_dfCounts_R_D)

# Separate the columns into RT and TR
RT_columns_D <- grep("_RT", colnames(Z_dfCounts_D), value = TRUE)
TR_columns_D <- grep("_TR", colnames(Z_dfCounts_D), value = TRUE)

RT_columns_F_D <- grep("_RT", colnames(Z_dfCounts_F_D), value = TRUE)
TR_columns_F_D <- grep("_TR", colnames(Z_dfCounts_F_D), value = TRUE)

RT_columns_M_D <- grep("_RT", colnames(Z_dfCounts_M_D), value = TRUE)
TR_columns_M_D <- grep("_TR", colnames(Z_dfCounts_M_D), value = TRUE)


RT_columns_R_D <- grep("_RT", colnames(Z_dfCounts_R_D), value = TRUE)
TR_columns_R_D <- grep("_TR", colnames(Z_dfCounts_R_D), value = TRUE)


# Combine the columns in the desired order
ordered_columns_D <- c(RT_columns_D, TR_columns_D)
ordered_columns_F_D <- c(RT_columns_F_D, TR_columns_F_D)
ordered_columns_M_D <- c(RT_columns_M_D, TR_columns_M_D)
ordered_columns_R_D <- c(RT_columns_R_D, TR_columns_R_D)

# Reorder the data frame based on these columns
Z_dfCounts_ordered_D <- Z_dfCounts_D[, ordered_columns_D]
Z_dfCounts_F_ordered_D <-  Z_dfCounts_F_D[, ordered_columns_F_D]
Z_dfCounts_M_ordered_D <-  Z_dfCounts_M_D[, ordered_columns_M_D]
Z_dfCounts_R_ordered_D <-  Z_dfCounts_R_D[, ordered_columns_R_D]


# Create the heatmap
FMR1 <- pheatmap(
  Z_dfCounts_ordered_D,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  cluster_cols = F,
  cluster_rows = F,
  main = "Z-score of rivosomal protein encoded genes across cells Microglia & Astrocytes for all regions",
  fontsize = 8,
  show_colnames = F,
  cellwidth = 12,
  cellheight = 7
)

F2 <- pheatmap(
  Z_dfCounts_F_ordered_D,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  cluster_cols = F,
  cluster_rows = F,
  main = "Z-score of ribosomal protein encoded genes across cells Microglia & Astrocytes for front region",
  fontsize = 8,
  show_colnames = F,
  cellwidth = 12,
  cellheight = 7
)

M2 <- pheatmap(
  Z_dfCounts_M_ordered_D,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  show_colnames = F,
  cluster_cols = F,
  cluster_rows = F,
  main = "Z-score of ribosomal protein encoded genes across cells Microglia & Astrocytes for middle region",
  fontsize = 8,
  cellwidth = 12,
  cellheight = 7
)

R2 <- pheatmap(
  Z_dfCounts_R_ordered_D,
  color = colorRampPalette(c("yellow", "white", "purple"))(40),
  annotation_col = col_annotation,
  annotation_colors = annotation_colors,
  show_colnames = F,
  cluster_cols = F,
  cluster_rows = F,
  main = "Z-score of ribosomal protein encoded genes across cells Microglia & Astrocytes for rear region",
  fontsize = 8,
  cellwidth = 12,
  cellheight = 7
)

ggsave(
  filename = "4Z-score_HeatmapRiboProtFrontMiddleRear.png",
  plot = FMR1,
  width = 13,
  height = 13
)

ggsave(
  filename = "3Z-score_HeatmapRiboProtFront.png",
  plot = F2,
  width = 13,
  height = 13
)

ggsave(
  filename = "3Z-score_HeatmapRiboProtMiddle.png",
  plot = M2,
  width = 13,
  height = 13
)

ggsave(
  filename = "3Z-score_HeatmapRiboProtRear.png",
  plot = R2,
  width = 13,
  height = 13
)
# Attaching data to object in the environment
# Sample Info
sampleInfo <- read.table("Sample-info_220722.txt", header = T)

# Celltype info
Celltype_markergenes <- read_excel("Celltype_markergenes.xlsx")

# Counts per million after normalization
rawCounts <-
  read.table(
    "raw_counts_cpm_after_norm_reorganised_220722.txt",
    header = T,
    row.names = 1
  )



# Set a seed to produce reproducible result
set.seed(16)

#Create DGE through DGEList() funtion
dgList <- DGEList(counts = rawCounts, genes = rownames(rawCounts))

# Add the replication column to the markergenes dataframe
# Building groups for the replication
replication <-
  factor(
    c(
      'rep1',
      'rep1',
      'rep1',
      'rep2',
      'rep2',
      'rep2',
      'rep3',
      'rep3',
      'rep2',
      'rep2',
      'rep2',
      'rep3',
      'rep3',
      'rep3',
      'rep1',
      'rep1',
      'rep1',
      'rep1',
      'rep1',
      'rep1',
      'rep2',
      'rep2',
      'rep2',
      'rep3',
      'rep3',
      'rep3',
      'rep2',
      'rep2',
      'rep2',
      'rep3',
      'rep3',
      'rep3',
      'rep1',
      'rep1',
      'rep1'
    )
  )

replication <- data.frame(replication)
sampleInfo$replication <- replication$replication



# Density plots of CPM per sample of data, filtered, and normalized log2
plotDensity(
  cpm(rawCounts, log = T),
  col = "red",
  xlab = "CPM of log2",
  ylab = "Density",
  main = "Normalized CPM log2 Data"
)

# Extract the Counts Matrix from the DGEList
Count_dgList <- dgList$counts

# Log the counts
logCounts_dgList <- log2(Count_dgList + 1)

# Filter out low-expressed genes
maxTPM <- apply(logCounts_dgList, 1, max)
minTPM <- apply(logCounts_dgList, 1, min)

logCounts_dgList <-
  logCounts_dgList[(maxTPM >= 2) & ((maxTPM - minTPM) >= 2),]

# Keep the top 500 genes based on variance
keepVariance <-
  order(apply(logCounts_dgList, 1, var), decreasing = T)[1:500]
topVariance_counts <- logCounts_dgList[keepVariance,]

# apply PCA on the transposed data
outputPCA <- prcomp(t(topVariance_counts))

# Calculate the variance of each PC
variancePC <- outputPCA$sdev ^ 2 / sum(outputPCA$sdev ^ 2)
variancePC <- round(variancePC, digits = 2)

variancePC_df <- data.frame(PC = paste0("PC", 1:10),
                            variancePC = variancePC[1:10])

# Create the scree plot
ggplot(variancePC_df, aes(x = PC, y = variancePC)) +
  geom_col() + geom_bar(stat = "identity", fill = "royalblue4") +
  labs(title = "Scree Plot of Principal Components") +
  ylab("Explained variance") + ylim(0, 1)

# Extract the PCs 1 and 2
outputPCA <- as.data.frame(outputPCA$x[, 1:2])

# Add DGEList varaibles to the PCA output
outputPCA[, 3:7] <- sampleInfo[, 1:5]



# Function to assign group based on pattern matching
assign_group <- function(sample, patterns) {
  for (pattern in patterns) {
    if (grepl(pattern, sample)) {
      return(pattern)
    }
  }
  return("Other")  # Default group for samples that don't match any pattern
}

# List of patterns
patterns <- c(
  "AC_F_RT", "AC_M_RT", "AC_R_RT", 
  "MG_F_RT", "MG_M_RT", "MG_R_RT",
  "AC_F_TR", "AC_M_TR", "AC_R_TR", 
  "MG_F_TR", "MG_M_TR", "MG_R_TR"
)


# Apply the function to each sample to create the group column
sampleInfo$Sample <- sapply(sampleInfo$sample, assign_group, patterns)

# Map this new grouping to the PCA output
outputPCA$Sample <- sampleInfo$Sample[match(row.names(outputPCA), sampleInfo$sample)]

# Define colors for each group
group_colors <- c(
  "AC_F_RT" = "green",
  "AC_M_RT" = "yellow",
  "AC_R_RT" = "red",
  "MG_F_RT" = "blue",
  "MG_M_RT" = "pink",
  "MG_R_RT" = "black",
  "AC_F_TR" = "limegreen",
  "AC_M_TR" = "orange",
  "AC_R_TR" = "darkred",
  "MG_F_TR" = "darkblue",
  "MG_M_TR" = "purple",
  "MG_R_TR" = "gray"
)

# Update the ggplot call for PCA plot
ggplot(
  outputPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = replication  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("PCA Plot of Replication by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

# Update the ggplot call for PCA plot
ggplot(
  outputPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = Condition  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("PCA Plot of Condition by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

# Update the ggplot call for PCA plot
ggplot(
  outputPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = Region  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("PCA Plot of Regions by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

# Update the ggplot call for PCA plot
ggplot(
  outputPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = Cell_type  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("PCA Plot of Celltype by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

# Create a design matrix with our variable of interest
designMatrix <- model.matrix( ~ as.factor(Region), data = sampleInfo)

# Apply the Combat correction of the replicates
outputComBat <- ComBat(
  dat = topVariance_counts,
  batch = as.factor(sampleInfo$replication),
  mod = designMatrix,
  par.prior = F
)
## Found3batches
## Adjusting for2covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding nonparametric adjustments
## Adjusting the Data
# Set negative values to 0
outputComBat[outputComBat < 0] <- 0

# Apply again PCA
correctedPCA <- prcomp(t(outputComBat))


corrected_varExplained <-
  correctedPCA$sdev ^ 2 / sum(correctedPCA$sdev ^ 2)
corrected_varExplained <- round(corrected_varExplained, digits = 2)

correctedPCA <- as.data.frame(correctedPCA$x[, 1:2])

correctedPCA[, 3:7] <- sampleInfo[, 1:5]

# Map this new grouping to the PCA output
correctedPCA$Sample <- sampleInfo$Sample[match(row.names(correctedPCA), sampleInfo$sample)]

# Update the ggplot call for PCA plot
ggplot(
  correctedPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = replication  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("Combat Corrected PCA Plot of Replication by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

# Update the ggplot call for PCA plot
ggplot(
  correctedPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = Region  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("Combat Corrected PCA Plot of Region by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

# Update the ggplot call for PCA plot
ggplot(
  correctedPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = Condition  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("Combat Corrected PCA Plot of Condition by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

# Update the ggplot call for PCA plot
ggplot(
  correctedPCA,
  aes(
    x = PC1,
    y = PC2,
    color = Sample,  # Use the new grouping variable for color
    shape = Cell_type  # Optionally, you can still shape by original replication
  )
) +
  ggtitle("Combat Corrected PCA Plot of Cell_type by Sample Groups") +
  geom_point(size = 3, alpha = 1) +
  scale_color_manual(values = group_colors, 
                     limits = patterns) +  # Set the order of colors in the legend
  theme(text = element_text(size = 3)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  xlab(paste("PC1: ", variancePC[1], sep = "")) +
  ylab(paste("PC2: ", variancePC[2], sep = ""))

#
sign_log2FC_AC_FM <- sign_PvalueAC_FM[, 2:3]
rownames(sign_log2FC_AC_FM) <- sign_log2FC_AC_FM$GeneNames
sign_log2FC_AC_FM <- sign_log2FC_AC_FM[, -1]
#
sign_log2FC_AC_FR <- sign_PvalueAC_FR[, 2:3]
rownames(sign_log2FC_AC_FR) <- sign_log2FC_AC_FR$GeneNames
sign_log2FC_AC_FR <- sign_log2FC_AC_FR[, -1]
#
sign_log2FC_AC_MR <- sign_PvalueAC_MR[, 2:3]
rownames(sign_log2FC_AC_MR) <- sign_log2FC_AC_MR$GeneNames
sign_log2FC_AC_MR <- sign_log2FC_AC_MR[, -1]

# Ensure that 'sign_log2FC_AC_FM' is a data frame
sign_log2FC_AC_FM <- as.data.frame(sign_log2FC_AC_FM)
sign_log2FC_AC_FR <- as.data.frame(sign_log2FC_AC_FR)
sign_log2FC_AC_MR <- as.data.frame(sign_log2FC_AC_MR)



#
sign_log2FC_MG_FM <- sign_PvalueMG_FM[, 2:3]
rownames(sign_log2FC_MG_FM) <- sign_log2FC_MG_FM$GeneNames
sign_log2FC_MG_FM <- sign_log2FC_MG_FM[, -1]
#
sign_log2FC_MG_FR <- sign_PvalueMG_FR[, 2:3]
rownames(sign_log2FC_MG_FR) <- sign_log2FC_MG_FR$GeneNames
sign_log2FC_MG_FR <- sign_log2FC_MG_FR[, -1]
#
sign_log2FC_MG_MR <- sign_PvalueMG_MR[, 2:3]
rownames(sign_log2FC_MG_MR) <- sign_log2FC_MG_MR$GeneNames
sign_log2FC_MG_MR <- sign_log2FC_MG_MR[, -1]

# Ensure that 'sign_log2FC_MG_FM' is a data frame
sign_log2FC_MG_FM <- as.data.frame(sign_log2FC_MG_FM)
sign_log2FC_MG_FR <- as.data.frame(sign_log2FC_MG_FR)
sign_log2FC_MG_MR <- as.data.frame(sign_log2FC_MG_MR)


# Now, 'sign_log2FC_MG_FM', 'sign_log2FC_MG_FR', 'sign_log2FC_MG_MR'
# should be a character vector of row names
sign_log2FC_MG_FM <- rownames(sign_log2FC_MG_FM)
sign_log2FC_MG_FR <- rownames(sign_log2FC_MG_FR)
sign_log2FC_MG_MR <- rownames(sign_log2FC_MG_MR)

head(sign_log2FC_MG_FM)
## [1] "1" "2" "3" "4" "5" "6"
head(sign_log2FC_MG_FR)
## [1] "1" "2" "3" "4" "5" "6"
head(sign_log2FC_MG_MR)
## [1] "1" "2" "3" "4" "5" "6"
################### GENE SETS #############

# Create gene sets
gene_set_AC_FM <- as.character(sign_PvalueAC_FM$GeneNames)
gene_set_AC_FR <- as.character(sign_PvalueAC_FR$GeneNames)
gene_set_AC_MR <- as.character(sign_PvalueAC_MR$GeneNames)

gene_set_MG_FM <- as.character(sign_PvalueMG_FM$GeneNames)
gene_set_MG_FR <- as.character(sign_PvalueMG_FR$GeneNames)
gene_set_MG_MR <- as.character(sign_PvalueMG_MR$GeneNames)


# AC_FMR
gene_sets_AC_FMR <-
  list(
    Front_vs_Middle = gene_set_AC_FM,
    Front_vs_Rear = gene_set_AC_FR,
    Middle_vs_Rear = gene_set_AC_MR
  )
plot(
  euler(gene_sets_AC_FMR),
  quantities = TRUE,
  fills = c("red", "green", "yellow")
)

grid.text(
  "Venn Diagram of DGE's for Astrocytes across Front, Middle, and Rear Regions",
  x = 0.5,
  y = 0.95
)

# Find common genes
common_genes_AC <-
  Reduce(intersect,
         list(gene_set_AC_FM, gene_set_AC_FR, gene_set_AC_MR))

# Print common genes
print(common_genes_AC)
##  [1] "Kif26b"   "Nmnat2"   "Ier5"     "Cdc42bpa" "Klhdc8a"  "Dcaf6"   
##  [7] "Bok"      "Igfbp5"   "Pdrg1"    "Tlk1"     "Dbndd2"   "Bmp7"    
## [13] "Slc4a10"  "G6pdx"    "Dmd"      "Tspan7"   "Mrpl24"   "Phgdh"   
## [19] "Magi3"    "Hcn3"     "Gfm1"     "S100a1"   "Disp3"    "Gabrd"   
## [25] "Rimkla"   "Ankrd6"   "Car8"     "Rell1"    "Chst12"   "Slc29a4" 
## [31] "Mgll"     "Ptms"     "Tmem176a" "Nr2c2"    "Lrig1"    "Tax1bp1" 
## [37] "Grm7"     "Nat8f1"   "Hrh1"     "Dennd2a"  "Eif3c"    "Inpp5f"  
## [43] "Cyth2"    "Hs3st2"   "Gpi1"     "Kcnc2"    "Cox4i1"   "Ank1"    
## [49] "Pgbd5"    "Kctd12"   "Rap2a"    "Ephx2"    "Lgi3"     "Sall2"   
## [55] "Ogdhl"    "Cmtm5"    "Elovl5"   "Sh3bgrl2" "Chst2"    "Ift46"   
## [61] "Fxyd6"    "Smad3"    "Cspg5"    "Mindy2"   "Gria4"    "Timm22"  
## [67] "Tenm2"    "Gria1"    "Car4"     "Usp32"    "Grin2c"   "Slc13a5" 
## [73] "Actr2"    "Pnpo"     "Hmgcr"    "Ttll5"    "Dio2"     "Cep128"  
## [79] "Snx13"    "Daam1"    "Dbpht2"   "Tunar"    "Dcaf5"    "Celsr1"  
## [85] "Aco2"     "Pmm1"     "Sod1"     "Skic2"    "Ddx39b"   "Nudt3"   
## [91] "Glo1"     "Gfra1"    "Nanos1"   "Rorb"
# Save common_genes to a file
write.csv(common_genes_AC, "common_genes_AC.csv")

# MG_FMR
gene_sets_MG_FMR <-
  list(
    Front_vs_Middle = gene_set_MG_FM,
    Front_vs_Rear = gene_set_MG_FR,
    Middle_vs_Rear = gene_set_MG_MR
  )
plot(
  euler(gene_sets_MG_FMR),
  quantities = TRUE,
  fills = c("red", "green", "yellow")
)
grid.text(
  "Venn Diagram of DGE's for Microglia across Front, Middle, and Rear Regions",
  x = 0.5,
  y = 0.95
)

# Find common genes
common_genes_MG <-
  Reduce(intersect,
         list(gene_set_MG_FM, gene_set_MG_FR, gene_set_MG_MR))

# Print common genes
print(common_genes_MG)
##  [1] "Rgs8"          "Ier5"          "C130074G19Rik" "Klhdc8a"      
##  [5] "Syt2"          "Acadl"         "Mmp24"         "Rapgef4"      
##  [9] "Etl4"          "Slc25a25"      "Plcb1"         "Gpsm1"        
## [13] "Pkp4"          "Gria3"         "Gria2"         "Rpl22l1"      
## [17] "Slc29a4"       "Fscn1"         "Tspan9"        "Slc8a2"       
## [21] "Cacng3"        "Atp2b1"        "Syde1"         "Ptbp1"        
## [25] "Gng7"          "Lzts1"         "Ank1"          "Dbndd1"       
## [29] "Cdhr1"         "Fbxo9"         "Arhgap32"      "Nrgn"         
## [33] "Tmem108"       "Arpp21"        "Cabp7"         "Grin2c"       
## [37] "Pde1b"         "Efr3a"         "Dexi"          "Hdgfl2"       
## [41] "Mtcl1"         "Sema4g"        "Kcnip2"        "Trpm3"
# Save common_genes to a file
write.csv(common_genes_MG, "common_genes_MG.csv")

## FM
# Create a named list of gene sets
gene_sets_AC_MG_FM <-
  list(Astrocyte_Front_vs_Middle = gene_set_AC_FM,
       Microglia_Front_vs_Middle = gene_set_MG_FM)

# Create an Euler diagram (a type of Venn diagram) with custom plot parameters
plot(
  euler(gene_sets_AC_MG_FM),
  quantities = TRUE,
  fills = c("red", "green")
)
grid.text(
  "Venn Diagram of DGE's for Astrocytes and Microglia Front vs Middle",
  x = 0.5,
  y = 0.90
)

# Find common genes
common_genes_MG_AC_FM <-
  Reduce(intersect, list(gene_set_MG_FM, gene_set_AC_FM))

# Print common genes
print(common_genes_MG_AC_FM)
##  [1] "Ier5"    "Itm2c"   "Klhdc8a" "Bok"     "Chrna4"  "Tshz2"   "Stard8" 
##  [8] "Phgdh"   "Sertm1"  "Ptpru"   "Napepld" "Slc29a4" "Dennd2a" "Hs3st2" 
## [15] "Adora2a" "Ank1"    "Rasd2"   "Gria1"   "Grin2c"  "Tunar"   "Clic6"
## FR
# Create a named list of gene sets
gene_sets_AC_MG_FR <-
  list(Astrocyte_Front_vs_Rear = gene_set_AC_FR,
       Microglia_Front_vs_Rear = gene_set_MG_FR)

# Create an Euler diagram (a type of Venn diagram) with custom plot parameters
plot(
  euler(gene_sets_AC_MG_FR),
  quantities = TRUE,
  fills = c("red", "green")
)
grid.text(
  "Venn Diagram of DGE's for Astrocytes and Microglia Front vs Rear",
  x = 0.5,
  y = 0.93
)

# Find common genes
common_genes_MG_AC_FR <-
  Reduce(intersect, list(gene_set_MG_FR, gene_set_AC_FR))

# Print common genes
print(common_genes_MG_AC_FR)
##   [1] "1700025G04Rik" "Sgpp2"         "Nmnat2"        "Rgs8"         
##   [5] "Ier5"          "Myo1b"         "Abl2"          "Actr3"        
##   [9] "Mcm3"          "Klhdc8a"       "Als2"          "Cntn2"        
##  [13] "Nfasc"         "Plekha6"       "Hsd11b1"       "Lrrfip1"      
##  [17] "Chil1"         "Syt2"          "Ildr2"         "Npas2"        
##  [21] "Rnf152"        "Adamts4"       "Rgs20"         "St18"         
##  [25] "Cables2"       "Slc27a4"       "Zfp341"        "Trp53inp2"    
##  [29] "Mmp24"         "Slc25a12"      "Gpr155"        "Epb41l1"      
##  [33] "Shf"           "Ndrg3"         "Osbpl6"        "Yme1l1"       
##  [37] "Mapk8ip1"      "Chst1"         "Fitm2"         "Rims4"        
##  [41] "Pamr1"         "Pltp"          "Pcif1"         "Cdh22"        
##  [45] "Slc20a1"       "Abtb2"         "Abca2"         "Garnl3"       
##  [49] "Bcas1"         "Bmp7"          "Nacc2"         "Gpsm1"        
##  [53] "Btbd3"         "Dab2ip"        "Rin2"          "Srp14"        
##  [57] "Pak6"          "Cst3"          "Syndig1"       "Nexmif"       
##  [61] "Cetn2"         "Atp2b3"        "Apln"          "Drp2"         
##  [65] "Tmem164"       "Arhgef9"       "Msn"           "Stard8"       
##  [69] "Efnb1"         "Zmym3"         "Hdac8"         "Rtca"         
##  [73] "F3"            "Gria2"         "Trpc3"         "Ndst3"        
##  [77] "Adamtsl4"      "Lrrc40"        "Setd7"         "Phgdh"        
##  [81] "Paqr6"         "Slc22a15"      "Dclk1"         "Car2"         
##  [85] "Lmo4"          "Inka2"         "Atp5pb"        "Lxn"          
##  [89] "Shc1"          "Tnik"          "Gnb4"          "Them4"        
##  [93] "Fbxo44"        "Lrp8"          "Cpt2"          "Spsb1"        
##  [97] "Whrn"          "Astn2"         "Epb41"         "Icmt"         
## [101] "Trabd2b"       "Dhdds"         "Pik3r3"        "Faap20"       
## [105] "Rcan3"         "Luzp1"         "Fndc10"        "Epha8"        
## [109] "Rimkla"        "Kcnq4"         "Smap2"         "Col9a2"       
## [113] "Gnl2"          "Trim62"        "Frrs1l"        "Epha7"        
## [117] "Car8"          "Chd7"          "Grin3a"        "Cds1"         
## [121] "Ctbp1"         "Steap2"        "Ociad2"        "Nat8l"        
## [125] "Dipk1a"        "Add1"          "Flt3"          "Kit"          
## [129] "Tmem165"       "Pan3"          "Grm3"          "Rgs12"        
## [133] "Dok7"          "Pitpnm2"       "Tmem132b"      "Wscd2"        
## [137] "Kcnh2"         "Tpst1"         "Caln1"         "Slc4a2"       
## [141] "Pxn"           "Lgi2"          "Sel1l3"        "Rell1"        
## [145] "Kcnk3"         "Slc30a3"       "Limk1"         "Tmem120a"     
## [149] "Sh2b2"         "Slc29a4"       "Ephb6"         "Fam131b"      
## [153] "Clstn3"        "Bcat1"         "Slc41a3"       "Ptms"         
## [157] "AI854703"      "Wnt7a"         "Lrig1"         "Tspan9"       
## [161] "Chn2"          "Pdzrn3"        "Lrrn1"         "Spr"          
## [165] "Rab11fip5"     "Grid2"         "Grm7"          "Ndnf"         
## [169] "Brpf1"         "Camk1"         "Atp6v1f"       "Ttll3"        
## [173] "Klhdc10"       "Ret"           "Dennd11"       "Erc1"         
## [177] "Tuba8"         "Gsg1l"         "Sbk1"          "Ccnd1"        
## [181] "Stard5"        "Il16"          "Pcsk6"         "Tlcd3b"       
## [185] "Kcnc3"         "Samd4b"        "Bcl7c"         "Lair1"        
## [189] "Ap3s2"         "Inpp5f"        "Syt5"          "Crtc3"        
## [193] "Grik5"         "Erf"           "Kcnc1"         "Ctbp2"        
## [197] "Mag"           "Tmem86a"       "Lgi4"          "Scn1b"        
## [201] "Inpp5a"        "Hs3st2"        "Cacng3"        "Pdcd5"        
## [205] "Chpt1"         "Cdc40"         "Cdk17"         "Atp2b1"       
## [209] "Rbms2"         "Gls2"          "Esyt1"         "Cnksr3"       
## [213] "Vsir"          "Unc5b"         "Sgpl1"         "Map3k5"       
## [217] "Epb41l2"       "Adora2a"       "Col18a1"       "Adarb1"       
## [221] "Slc1a6"        "Hcn2"          "Plk5"          "Nfic"         
## [225] "Pard6a"        "Arhgap10"      "Gfod2"         "Arhgef7"      
## [229] "Sorbs2"        "Prkaca"        "Nacc1"         "Nek1"         
## [233] "Cmip"          "Kcng4"         "Cbln1"         "Ank1"         
## [237] "Trim67"        "Fnta"          "Efnb2"         "Nalf1"        
## [241] "Car7"          "Trmt9b"        "Tppp3"         "Cryl1"        
## [245] "Slitrk5"       "Kctd9"         "Lgi3"          "Armh4"        
## [249] "Rnase4"        "Sall2"         "Gdf10"         "Fbxo9"        
## [253] "Kirrel3"       "Tmem266"       "1700017B05Rik" "Clmp"         
## [257] "Endod1"        "4931406C07Rik" "Zic1"          "Nectin1"      
## [261] "Fat3"          "Tmem25"        "Tmem108"       "Kank2"        
## [265] "Map2k1"        "Megf11"        "Anln"          "Rora"         
## [269] "Golga4"        "Mindy2"        "Trak1"         "Vipr1"        
## [273] "Hhatl"         "Snrk"          "Tmem158"       "Gria4"        
## [277] "Slc43a2"       "Bcl11a"        "Foxj1"         "Git1"         
## [281] "Lsm11"         "Rnd2"          "Traf4"         "Ubb"          
## [285] "Mpp3"          "Arhgap44"      "Lsm12"         "Shisa6"       
## [289] "Sowaha"        "Grn"           "Slc22a4"       "Adam11"       
## [293] "C1ql1"         "Rab40b"        "Sparc"         "Gria1"        
## [297] "Car4"          "Ppm1e"         "Septin4"       "Mink1"        
## [301] "Tmem100"       "Pfn1"          "Btbd17"        "Grin2c"       
## [305] "Fads6"         "Cdr2l"         "Spns2"         "Atp2a3"       
## [309] "Cacna1g"       "Samd14"        "Itga3"         "Meis1"        
## [313] "Btf3"          "Gfod1"         "Adcy2"         "Cd83"         
## [317] "Diras2"        "Auh"           "Prpf4b"        "Arrdc3"       
## [321] "Tspan17"       "Zfp346"        "Mef2c"         "Hpcal1"       
## [325] "Ttll5"         "Grhl1"         "Rapgef5"       "Gpr68"        
## [329] "Syt16"         "Hspa2"         "Atp6v1d"       "Tmem229b"     
## [333] "Exd2"          "Cyp46a1"       "Susd6"         "Inf2"         
## [337] "Shisa8"        "Mtss1"         "Tafa5"         "Khdrbs3"      
## [341] "Slc45a4"       "Cmbl"          "Slc38a1"       "Rnd1"         
## [345] "Asic1"         "Foxred2"       "Pvalb"         "Cdc42ep1"     
## [349] "Syngr1"        "Polr3h"        "Maf1"          "Tiam1"        
## [353] "Plcxd2"        "Ccdc74a"       "Nxpe3"         "Dgcr2"        
## [357] "Camk2n2"       "Map3k13"       "Kalrn"         "Daam2"        
## [361] "Plcl2"         "Tbc1d5"        "Nherf2"        "Abhd16a"      
## [365] "Mtcl1"         "Zdhhc14"       "Snx9"          "Ddx39b"       
## [369] "Epb41l3"       "Antkmt"        "Pla2g7"        "Grm4"         
## [373] "Tbc1d22b"      "Pde9a"         "Garem1"        "Synpo"        
## [377] "Ablim3"        "Apc"           "Stk32a"        "St8sia5"      
## [381] "Sall3"         "Pip5k1b"       "Afap1l2"       "Prkg1"        
## [385] "Ehd1"          "Ppp1r3c"       "Pdlim1"        "Chrm1"        
## [389] "Pi4k2a"        "Tut1"          "Incenp"        "Rab3il1"      
## [393] "Cnnm1"         "Sema4g"        "Tle4"          "Ina"          
## [397] "Aldh1a1"       "Cemip2"
## MR
# Create a named list of gene sets
gene_sets_AC_MG_MR <-
  list(Astrocyte_Middle_vs_Rear = gene_set_AC_MR,
       Microglia_Middle_vs_Rear = gene_set_MG_MR)

# Create an Euler diagram (a type of Venn diagram) with custom plot parameters
plot(
  euler(gene_sets_AC_MG_MR),
  quantities = TRUE,
  fills = c("red", "green")
)
grid.text(
  "Venn Diagram of DGE's for Astrocytes and Microglia Middle vs Rear",
  x = 0.5,
  y = 0.95
)

# Find common genes
common_genes_MG_AC_MR <-
  Reduce(intersect, list(gene_set_MG_MR, gene_set_AC_MR))

# Print common genes
print(common_genes_MG_AC_MR)
##   [1] "Vxn"           "Sgpp2"         "Rgs8"          "Cacna1e"      
##   [5] "Ier5"          "Stum"          "Capn2"         "Ptma"         
##   [9] "Brinp2"        "C130074G19Rik" "Ngef"          "Ankrd45"      
##  [13] "Klhdc8a"       "Als2"          "Cntn2"         "Nek2"         
##  [17] "Nfasc"         "Agap1"         "Atp2b4"        "Syt2"         
##  [21] "Ildr2"         "Fam78b"        "Rgs4"          "Bok"          
##  [25] "Pam"           "Kif21b"        "Rnf152"        "Ddx59"        
##  [29] "Pecr"          "Stk11ip"       "Scn3a"         "Usp6nl"       
##  [33] "Ttbk2"         "Mmp24"         "Rapgef4"       "Lrp4"         
##  [37] "Epb41l1"       "Shf"           "Dlgap4"        "Ndrg3"        
##  [41] "Src"           "Slc32a1"       "Ppp1r16b"      "Dhx35"        
##  [45] "Rims4"         "Pde1a"         "Cdh22"         "Sirpa"        
##  [49] "Prex1"         "Abca2"         "Kcnb1"         "Garnl3"       
##  [53] "Kcng1"         "Hspa5"         "Kcnt1"         "Plcb1"        
##  [57] "Nacc2"         "Gpsm1"         "Btbd3"         "Pcsk2"        
##  [61] "Dab2ip"        "Rasgrp1"       "Syndig1"       "Abhd12"       
##  [65] "Mmadhc"        "Pkp4"          "Slc4a10"       "Slc16a2"      
##  [69] "Gabra3"        "Gria3"         "Iqsec2"        "Apln"         
##  [73] "Fgf13"         "Wdr13"         "Msn"           "Jade3"        
##  [77] "Cdk16"         "Zmym3"         "Gpr88"         "Slitrk3"      
##  [81] "Plppr5"        "Rapgef2"       "F3"            "Gria2"        
##  [85] "Gucy1b1"       "Npy2r"         "St6galnac5"    "Sema6c"       
##  [89] "Prss12"        "Rab33b"        "Paqr6"         "Sertm1"       
##  [93] "Atp1a1"        "Impa1"         "Magi3"         "Lmo4"         
##  [97] "Lxn"           "Rpl22l1"       "Adgrb2"        "Slc25a33"     
## [101] "Spsb1"         "Whrn"          "Zfyve9"        "Astn2"        
## [105] "Epb41"         "Icmt"          "Paqr7"         "Pik3r3"       
## [109] "Man1c1"        "Faap20"        "Rcan3"         "Pithd1"       
## [113] "Fndc10"        "Tmem240"       "Aurkaip1"      "Epha8"        
## [117] "Camk2n1"       "Rimkla"        "Iffo2"         "Hpcal4"       
## [121] "Trim62"        "Rnf19b"        "Frrs1l"        "Lyn"          
## [125] "Akirin2"       "Car8"          "Chd7"          "Pdp1"         
## [129] "Il11ra1"       "Rnf20"         "Kctd8"         "Gabrb1"       
## [133] "Ctbp1"         "Nat8l"         "Kit"           "Pan3"         
## [137] "Pitpnm2"       "Fbrsl1"        "Crybb1"        "Evc2"         
## [141] "Tmem132b"      "Gsap"          "Wscd2"         "Rimbp2"       
## [145] "Acacb"         "Kcnh2"         "Caln1"         "Cabp1"        
## [149] "Actr3b"        "Lgi2"          "Sel1l3"        "Kcnk3"        
## [153] "Klhl5"         "Rph3a"         "Trafd1"        "Limk1"        
## [157] "Elfn1"         "Slc29a4"       "Actb"          "Slco1c1"      
## [161] "Ephb6"         "Golt1b"        "Fam131b"       "Rmnd5a"       
## [165] "Clstn3"        "Bcat1"         "Lrrtm1"        "Prickle2"     
## [169] "Pals2"         "Lrig1"         "Ccnd2"         "Tspan9"       
## [173] "Chn2"          "Wipf3"         "Scrn1"         "Grin2b"       
## [177] "Grid2"         "Grm7"          "Add2"          "Lmo3"         
## [181] "Strip2"        "Atp2b2"        "Plxnd1"        "Dennd11"      
## [185] "Tuba8"         "Sbk1"          "Iglon5"        "Il16"         
## [189] "Pcsk6"         "Slc8a2"        "Tlcd3b"        "Grm5"         
## [193] "Pld3"          "Me3"           "Kcnc3"         "Timm50"       
## [197] "Atf5"          "Fam174b"       "Swap70"        "Mfge8"        
## [201] "Mical2"        "Tenm4"         "Slc17a7"       "Inpp5f"       
## [205] "Shisa7"        "Isoc2a"        "U2af2"         "Arhgap33"     
## [209] "Grik5"         "Smg1"          "Ppme1"         "Gpr26"        
## [213] "Kcnc1"         "Ctbp2"         "Mag"           "Ptpn5"        
## [217] "Inpp5a"        "Cacng3"        "Hs3st4"        "Zfp365"       
## [221] "Agap2"         "Os9"           "Btg1"          "Atp2b1"       
## [225] "Phyhipl"       "Ppfia2"        "Gls2"          "P4ha1"        
## [229] "Zfc3h1"        "Esyt1"         "Cnksr3"        "Grm1"         
## [233] "Unc5b"         "Epb41l2"       "L3mbtl3"       "Icosl"        
## [237] "Slc1a6"        "Stk11"         "Plk5"          "Lingo3"       
## [241] "Nfic"          "Celf5"         "Appl2"         "Pard6a"       
## [245] "Arhgef7"       "Fat1"          "Slc25a4"       "Gas6"         
## [249] "Prkaca"        "Dlgap2"        "Cmip"          "Cpe"          
## [253] "Klhl2"         "Kcng4"         "Cbln1"         "Lpl"          
## [257] "Ncan"          "Cyld"          "Ank1"          "Cpne7"        
## [261] "Def8"          "Trim67"        "Fnta"          "Ctxn1"        
## [265] "Adgrg1"        "Cdh11"         "Nrp1"          "Car7"         
## [269] "Trmt9b"        "Zmiz1"         "Fbxo34"        "Lmo7"         
## [273] "Slitrk5"       "Erc2"          "Clu"           "Ptk2b"        
## [277] "Tmtc4"         "Lgi3"          "Hr"            "Armh4"        
## [281] "Vstm4"         "Cpne6"         "Emc9"          "Zbtb16"       
## [285] "Aplp2"         "Dixdc1"        "Kirrel3"       "Tpbg"         
## [289] "Tmem266"       "Nrgn"          "Camkv"         "Clmp"         
## [293] "Amotl1"        "Sema7a"        "Zic1"          "Dipk2a"       
## [297] "Mrps22"        "Atg4d"         "Lrrc49"        "Scn4b"        
## [301] "Kank2"         "Rgl3"          "2900052N01Rik" "Megf11"       
## [305] "Lactb"         "Arpp21"        "Rora"          "Mindy2"       
## [309] "Cck"           "Snrk"          "Gria4"         "Slc43a2"      
## [313] "Ccdc88a"       "Rapgefl1"      "Jup"           "Foxj1"        
## [317] "Git1"          "Wwc1"          "Rbfox3"        "Traf4"        
## [321] "Tlcd1"         "Mpp3"          "Nlk"           "Shisa6"       
## [325] "Arhgdia"       "Ntn1"          "Rab40b"        "Nmt1"         
## [329] "Acsl6"         "Sparc"         "Tex2"          "Septin4"      
## [333] "Mink1"         "Tmem100"       "Sdk2"          "Grin2c"       
## [337] "Cdr2l"         "Atp2a3"        "Cacna1g"       "Ppp1r9b"      
## [341] "Sgsm2"         "Enc1"          "Ptdss1"        "Phactr1"      
## [345] "Gfod1"         "Adcy2"         "Aldh5a1"       "Diras2"       
## [349] "Auh"           "Arrdc3"        "Mef2c"         "Pdlim7"       
## [353] "Homer1"        "Sv2c"          "Ptch1"         "Rasgrf2"      
## [357] "Pdia6"         "Odc1"          "Hpcal1"        "Ttll5"        
## [361] "Pnn"           "Lrfn5"         "Tmem229b"      "Bcl11b"       
## [365] "Cyp46a1"       "Sipa1l1"       "Rgs6"          "Cyria"        
## [369] "Sptb"          "Shisa8"        "Pde1b"         "Mtss1"        
## [373] "Shisal1"       "Cerk"          "Kcnq3"         "Zfr"          
## [377] "Trabd"         "Cdh18"         "Ankrd33b"      "Adgrb1"       
## [381] "Them6"         "Prickle1"      "Slc38a1"       "Cacnb3"       
## [385] "Ddn"           "Enpp2"         "Asic1"         "Kifc2"        
## [389] "Tamalin"       "Txn2"          "Pvalb"         "Cyth4"        
## [393] "Card10"        "Kcnj4"         "Mchr1"         "Plcxd2"       
## [397] "Tmem191"       "Lztr1"         "Dgcr2"         "Fam131a"      
## [401] "Kalrn"         "Ehmt2"         "Abhd16a"       "Mtcl1"        
## [405] "Antkmt"        "Mrpl28"        "Syngap1"       "Grm4"         
## [409] "Map4k3"        "Tmem178"       "Pde9a"         "Pknox1"       
## [413] "Cyp4f15"       "Synpo"         "Camk2a"        "Ablim3"       
## [417] "Nrep"          "Lrrtm2"        "Stk32a"        "Myo5b"        
## [421] "St8sia5"       "Zfp521"        "Jcad"          "Afap1l2"      
## [425] "Cfl1"          "Ltbp3"         "Ehd1"          "Chrm1"        
## [429] "Sema4g"        "Kcnip2"        "Psd"           "Trim8"        
## [433] "Ina"           "Gda"           "Pdcd4"         "Tcf7l2"       
## [437] "Bad"
# Save all the intersected genes
write.csv(common_genes_MG_AC_FM, "common_genes_MG_AC_FM.csv")
write.csv(common_genes_MG_AC_FR, "common_genes_MG_AC_FR.csv")
write.csv(common_genes_MG_AC_MR, "common_genes_MG_AC_MR.csv")
# read the data file with all samples
rawCounts <-
  read.table("raw_counts_cpm_after_norm_reorganised_220722.txt", header = TRUE)


# # USing biomaRt annotation  method.
# ensembl <- useMart("ensembl")
# 
# listDatasets(ensembl)
# 
# ensembl_mouse <- useDataset("mmusculus_gene_ensembl", mart = ensembl)
# 
# mouse_gene_ids <- rawCounts$ensembl_id
# mouse_gene_info <- getBM(attributes = c("ensembl_gene_id", "external_gene_name", "chromosome_name", "start_position", "end_position", "gene_biotype", "strand"),
#                          filters = "ensembl_gene_id",
#                          values = mouse_gene_ids,
#                          mart = ensembl_mouse)

# Changing name of column too merge the values with a matching column.
colnames(mouse_gene_info)[c(1)] <- c("ensembl_id")
# Merging the two dataframes into one dataframe leaving out one
rawCounts <- merge(rawCounts, mouse_gene_info, c(1))

rawCounts <- rawCounts[,-c(1, 38:42)]

colnames(rawCounts)[c(36)] <-  c("GeneNames")

# keep only the first occurrence of each duplicated gene name
rawCounts <- rawCounts[!duplicated(rawCounts$GeneNames),]

# add gene name to  row names 
row.names(rawCounts) <-  rawCounts$GeneNames
rawCounts <- rawCounts[,-36]

# Keep only rows where all values are above 1
rawCounts <- rawCounts[apply(rawCounts, 1, function(x) all(x > 1)), ]


# separating the columns if ribotag and totalRNA  
# into  separated dataframe object for the rawCounts and sampleInfo
rawCounts_RT <- rawCounts[, grep("_RT", colnames(rawCounts))]
#rawCounts_TR <- rawCounts[, grep("_TR", colnames(rawCounts))]

rawCounts_RT <- rawCounts_RT[order(colnames(rawCounts_RT))]

rawCounts_RT$AC_F <- rowMeans(rawCounts_RT[,1:2])
rawCounts_RT$AC_M <- rowMeans(rawCounts_RT[,3:5])
rawCounts_RT$AC_R <- rowMeans(rawCounts_RT[,6:8])

rawCounts_RT$MG_F <- rowMeans(rawCounts_RT[,9:11])
rawCounts_RT$MG_M <- rowMeans(rawCounts_RT[,12:14])
rawCounts_RT$MG_R <- rowMeans(rawCounts_RT[,15:17])

rawCounts_RT <- rawCounts_RT[,-c(1:17)]


#rawCounts_TR <- rawCounts_TR[order(colnames(rawCounts_TR))]


# making function for calculating z score since scale function is not ideal
calc_zscore <- function(df) {
  result <- (df - mean(df)) / sd(df)
  return(result)
}

# calculating the Z-score of ribotag over the Z-score of totalRNA
# calculating the z-score
Z_score_RT <- apply(rawCounts_RT, 1, calc_zscore)
Z_score_RT <- data.frame(t(Z_score_RT))

Z_score_RT <- Z_score_RT[apply(Z_score_RT, 1, function(x) {all(x > -1.1 & x < 1.1)}),]

sample_ratios <- data.frame(
  AC_F = Z_score_RT[, grep("AC_F", colnames(Z_score_RT))],
  AC_M = Z_score_RT[, grep("AC_M", colnames(Z_score_RT))],
  AC_R = Z_score_RT[, grep("AC_R", colnames(Z_score_RT))],
  MG_F = Z_score_RT[, grep("MG_F", colnames(Z_score_RT))],
  MG_M = Z_score_RT[, grep("MG_M", colnames(Z_score_RT))],
  MG_R = Z_score_RT[, grep("MG_R", colnames(Z_score_RT))]
)

rownames(sample_ratios) <- rownames(Z_score_RT)
sample_ratios <- na.omit(sample_ratios)
# Convert the dataframe to a binary format: 1 if not NA, 0 otherwise
binary_matrix <- as.data.frame(lapply(sample_ratios, function(x) as.integer(x)))

# Preserve the row names in the binary matrix
row.names(binary_matrix) <- row.names(sample_ratios)


# Create the UpSet plot
upset(
  binary_matrix,
  sets = c("AC_F", "AC_M", "AC_R", "MG_F", "MG_M", "MG_R"),
  order.by = "freq",
  mainbar.y.label = "Intersection Size",
  sets.bar.color = "black"
)

Matrix_Samples <- as.matrix(sample_ratios)
Matrix_Samples <- na.omit(Matrix_Samples)
pheatmap(Matrix_Samples)

Upset_heatmap <- pheatmap(
  t(Matrix_Samples),
  cluster_cols = T,
  cluster_rows = F,
  main = "Z-score of each celltype's average across regions",
  fontsize = 5,
  cellwidth = 4.5,
  cellheight = 30
  
)

ggsave(
  filename = "UpsetheatmapPlot1.png",
  plot = Upset_heatmap,
  width = 20,
  height = 13
)

binary_matrix$GeneNames <- rownames(binary_matrix)
write_xlsx(binary_matrix, "binary_matrix_upsetPlot.xlsx")
sample_ratios$GeneNames <- rownames(sample_ratios)
write_xlsx(sample_ratios,
"UpsetPlot_GeneList.xlsx")
# read the data file with all samples
rawCounts <-
  read.table("raw_counts_cpm_after_norm_reorganised_220722.txt", header = TRUE)


# Load sample information data
sampleInfo <- read.table("Sample-info_220722.txt", header = TRUE)

# Load cell type marker genes data from an Excel file
markerGenes <- read_excel("Celltype_markergenes.xlsx")


# ensembl <- useMart("ensembl")
# listDatasets(ensembl)
# 
# ensembl_mouse <- useDataset("mmusculus_gene_ensembl", mart = ensembl)
# 
# mouse_gene_ids <- rawCounts$ensembl_id
# mouse_gene_info <- getBM(attributes = c("ensembl_gene_id", "external_gene_name", "chromosome_name", "start_position", "end_position", "gene_biotype", "strand"),
#                          filters = "ensembl_gene_id",
#                          values = mouse_gene_ids,
#                          mart = ensembl_mouse)


# Changing name of column too merge the values with a matching column.
colnames(mouse_gene_info)[c(1)] <- c("ensembl_id")
# Merging the two dataframes into one dataframe leaving out one
rawCounts <- merge(rawCounts, mouse_gene_info)

rawCounts <- rawCounts[,-c(1, 38:42)]

colnames(rawCounts)[c(36)] <-  c("GeneNames")
########
# keep only the first occurrence of each duplicated gene name
rawCounts <- rawCounts[!duplicated(rawCounts$GeneNames),]


row.names(rawCounts) <-  rawCounts$GeneNames
rawCounts <- rawCounts[,-36]

rawCounts_RT <- rawCounts[,1:17]
# Plot two genes only RT

# making function for calculating z score since scale function is not ideal
calc_zscore <- function(df) {
  result <- (df - mean(df)) / sd(df)
  return(result)
}

Z_score_RT <- apply(rawCounts_RT, 1, calc_zscore)
Z_score_RT <- as.data.frame(t(Z_score_RT))
TwoGenes <- data.frame(TwoGenes <- Z_score_RT[c("Ndufb4c", "Ndufb11"), ])
TwoGenes$genenames <- rownames(TwoGenes)

# Get my data into a long format
TwoGenes_boxPlot <-
  gather(TwoGenes, key = "samples", value = "values",-c(genenames))

# Filter samples by the specified regions
regions <- c("_F", "_M", "_R")
TwoGenes_boxPlot <- TwoGenes_boxPlot %>%
  filter(str_detect(samples, paste0("AC", "|", "MG")) &
           str_detect(samples, paste(regions, collapse = "|")))

# Assign groups based on the cell type
TwoGenes_boxPlot$group <-
  ifelse(str_detect(TwoGenes_boxPlot$samples, "AC"),
         "Astrocytes",
         "Microglia")

# Function to create a boxplot for the specified regions
create_boxplot <- function(region1, region2) {
  # Filter samples by the specified regions
  TwoGenes_boxPlot_filtered <- TwoGenes_boxPlot %>%
    filter(str_detect(samples, paste(c(region1, region2), collapse = "|")))
  
  # Create the boxplot
  ggplot(TwoGenes_boxPlot_filtered,
         aes(x = genenames, y = values, fill = group)) +
    geom_boxplot() +
    scale_fill_manual(values = c("green", "red")) +
    theme_light() +
    ggtitle(paste("Boxplot for", region1, "and", region2, "regions"))
}

# Create separate boxplots for the specified region pairs
boxplot_front_middle <- create_boxplot("_F", "_M")
boxplot_front_rare <- create_boxplot("_F", "_R")
boxplot_middle_rare <- create_boxplot("_M", "_R")

# Display the boxplots
boxplot_front_middle

boxplot_front_rare

boxplot_middle_rare

# Function to assign region labels to the samples
assign_region <- function(samples) {
  if (str_detect(samples, "_F")) {
    return("Front")
  } else if (str_detect(samples, "_M")) {
    return("Middle")
  } else if (str_detect(samples, "_R")) {
    return("Rare")
  } else {
    return(NA)
  }
}

# Assign region labels to the samples
TwoGenes_boxPlot$region <-
  sapply(TwoGenes_boxPlot$samples, assign_region)

# Filter samples by the specified regions
TwoGenes_boxPlot_filtered <- TwoGenes_boxPlot %>%
  filter(str_detect(samples, paste(regions, collapse = "|")))

# Create the boxplot with separate facets for each region pair
ggplot(TwoGenes_boxPlot_filtered,
       aes(x = genenames, y = values, fill = group)) +
  geom_boxplot() +
  scale_fill_manual(values = c("green", "red")) +
  theme_light() +
  facet_wrap( ~ region, ncol = 3) +
  ggtitle("Boxplots for gene expression per region for astrocytes and microglia, Ribotag data")